/usr/src/gcc-4.9/debian/patches/gcc-linaro.diff

# DP: Changes for the Linaro 4.9-2015.10 release.

LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@229467 \
    svn://gcc.gnu.org/svn/gcc/branches/linaro/gcc-4_9-branch@229665 \
 | filterdiff --remove-timestamps --addoldprefix=a/src/ --addnewprefix=b/src/

--- a/src/libitm/ChangeLog.linaro
+++ b/src/libitm/ChangeLog.linaro
@@ -0,0 +1,92 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-10-03  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r213035.
+	2014-07-24  Richard Henderson  <rth@redhat.com>
+
+	* config/aarch64/sjlj.S (_ITM_beginTransaction): Use post-inc
+	addressing mode in epilogue.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-26  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r210615.
+	2014-05-19  Richard Henderson  <rth@redhat.com>
+
+	* config/aarch64/sjlj.S: New file.
+	* config/aarch64/target.h: New file.
+	* configure.tgt: Enable aarch64.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
--- a/src/libgomp/ChangeLog.linaro
+++ b/src/libgomp/ChangeLog.linaro
@@ -0,0 +1,75 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
--- a/src/libquadmath/ChangeLog.linaro
+++ b/src/libquadmath/ChangeLog.linaro
@@ -0,0 +1,75 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
--- a/src/libsanitizer/ChangeLog.linaro
+++ b/src/libsanitizer/ChangeLog.linaro
@@ -0,0 +1,75 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
--- a/src/zlib/ChangeLog.linaro
+++ b/src/zlib/ChangeLog.linaro
@@ -0,0 +1,75 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
--- a/src/libstdc++-v3/ChangeLog.linaro
+++ b/src/libstdc++-v3/ChangeLog.linaro
@@ -0,0 +1,94 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r216444.
+	2014-10-19  Maxim Kuvyrkov  <maxim.kuvyrkov@linaro.org>
+
+	* testsuite/lib/libstdc++.exp (v3-copy-file): New proc split from ...
+	(v3-copy-files): ... this.  Update.
+	(check_v3_target_fileio): Fix race on cin_unget-1.txt file.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-10-06  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r215101.
+	2014-09-10  Tony Wang  <tony.wang@arm.com>
+
+	PR target/56846
+	* libsupc++/eh_personality.cc (PERSONALITY_FUNCTION):
+	Return with CONTINUE_UNWINDING when the state pattern
+	contains: _US_VIRTUAL_UNWIND_FRAME | _US_FORCE_UNWIND
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
--- a/src/libstdc++-v3/testsuite/lib/libstdc++.exp
+++ b/src/libstdc++-v3/testsuite/lib/libstdc++.exp
@@ -63,19 +63,24 @@
     verbose "++ $var is $val" $n
 }
 
+# Copy file to the target.
+proc v3-copy-file {src dst} {
+    if { [catch { set symlink [file readlink $src] } x] } then {
+	remote_download target $src $dst
+    } else {
+	if { [regexp "^/" "$symlink"] } then {
+	    remote_download target $symlink $dst
+	} else {
+	    set dirname [file dirname $f]
+	    remote_download target $dirname/$symlink $dst
+	}
+    }
+}
+
 # Called by v3-init below.  "Static" to this file.
 proc v3-copy-files {srcfiles} {
     foreach f $srcfiles {
-        if { [catch { set symlink [file readlink $f] } x] } then {
-	    remote_download target $f
-        } else {
-            if { [regexp "^/" "$symlink"] } then {
-		remote_download target $symlink
-            } else {
-                set dirname [file dirname $f]
-		remote_download target $dirname/$symlink
-            }
-        }
+	v3-copy-file $f [file tail $f]
     }
 }
 
@@ -685,8 +690,8 @@
 	# the file functions
 	set src fileio[pid].cc
 	set exe fileio[pid].x
-	set testfile "cin_unget-1.txt"
-	v3-copy-files "$srcdir/data/$testfile"
+	set testfile "cin_unget-1.[pid].txt"
+	v3-copy-file "$srcdir/data/cin_unget-1.txt" "$testfile"
 
 	set f [open $src "w"]
 	puts $f "#include <sys/types.h>"
--- a/src/configure.ac
+++ b/src/configure.ac
@@ -331,7 +331,8 @@
     if test "$is_elf" = "yes"; then
       # Check for target supported by gold.
       case "${target}" in
-        i?86-*-* | x86_64-*-* | sparc*-*-* | powerpc*-*-* | arm*-*-* | tilegx*-*-*)
+        i?86-*-* | x86_64-*-* | sparc*-*-* | powerpc*-*-* | arm*-*-* \
+        | aarch64*-*-* | tilegx*-*-*)
 	  configdirs="$configdirs gold"
 	  if test x${ENABLE_GOLD} = xdefault; then
 	    default_ld=gold
--- a/src/intl/ChangeLog.linaro
+++ b/src/intl/ChangeLog.linaro
@@ -0,0 +1,75 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
--- a/src/ChangeLog.linaro
+++ b/src/ChangeLog.linaro
@@ -0,0 +1,83 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r215865.
+	2014-10-03  Jing Yu  <jingyu@google.com>
+
+	* configure.ac: Add aarch64 to list of targets that support gold.
+	* configure: Regenerate.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
--- a/src/boehm-gc/ChangeLog.linaro
+++ b/src/boehm-gc/ChangeLog.linaro
@@ -0,0 +1,75 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
--- a/src/include/ChangeLog.linaro
+++ b/src/include/ChangeLog.linaro
@@ -0,0 +1,82 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209649.
+	2014-04-22  Yufeng Zhang  <yufeng.zhang@arm.com>
+
+	* longlong.h: Merge from glibc.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
--- a/src/include/longlong.h
+++ b/src/include/longlong.h
@@ -1,5 +1,5 @@
 /* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
-   Copyright (C) 1991-2013 Free Software Foundation, Inc.
+   Copyright (C) 1991-2014 Free Software Foundation, Inc.
 
    This file is part of the GNU C Library.
 
@@ -122,6 +122,22 @@
 #define __AND_CLOBBER_CC , "cc"
 #endif /* __GNUC__ < 2 */
 
+#if defined (__aarch64__)
+
+#if W_TYPE_SIZE == 32
+#define count_leading_zeros(COUNT, X)	((COUNT) = __builtin_clz (X))
+#define count_trailing_zeros(COUNT, X)   ((COUNT) = __builtin_ctz (X))
+#define COUNT_LEADING_ZEROS_0 32
+#endif /* W_TYPE_SIZE == 32 */
+
+#if W_TYPE_SIZE == 64
+#define count_leading_zeros(COUNT, X)	((COUNT) = __builtin_clzll (X))
+#define count_trailing_zeros(COUNT, X)   ((COUNT) = __builtin_ctzll (X))
+#define COUNT_LEADING_ZEROS_0 64
+#endif /* W_TYPE_SIZE == 64 */
+
+#endif /* __aarch64__ */
+
 #if defined (__alpha) && W_TYPE_SIZE == 64
 #define umul_ppmm(ph, pl, m0, m1) \
   do {									\
--- a/src/libiberty/ChangeLog.linaro
+++ b/src/libiberty/ChangeLog.linaro
@@ -0,0 +1,75 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
--- a/src/lto-plugin/ChangeLog.linaro
+++ b/src/lto-plugin/ChangeLog.linaro
@@ -0,0 +1,75 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
--- a/src/contrib/regression/ChangeLog.linaro
+++ b/src/contrib/regression/ChangeLog.linaro
@@ -0,0 +1,75 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
--- a/src/contrib/ChangeLog.linaro
+++ b/src/contrib/ChangeLog.linaro
@@ -0,0 +1,75 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
--- a/src/contrib/reghunt/ChangeLog.linaro
+++ b/src/contrib/reghunt/ChangeLog.linaro
@@ -0,0 +1,75 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
--- a/src/libatomic/ChangeLog.linaro
+++ b/src/libatomic/ChangeLog.linaro
@@ -0,0 +1,75 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
--- a/src/config/ChangeLog.linaro
+++ b/src/config/ChangeLog.linaro
@@ -0,0 +1,75 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
--- a/src/libbacktrace/ChangeLog.linaro
+++ b/src/libbacktrace/ChangeLog.linaro
@@ -0,0 +1,75 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
--- a/src/libjava/libltdl/ChangeLog.linaro
+++ b/src/libjava/libltdl/ChangeLog.linaro
@@ -0,0 +1,75 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
--- a/src/libjava/ChangeLog.linaro
+++ b/src/libjava/ChangeLog.linaro
@@ -0,0 +1,75 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
--- a/src/libjava/classpath/ChangeLog.linaro
+++ b/src/libjava/classpath/ChangeLog.linaro
@@ -0,0 +1,75 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
--- a/src/gnattools/ChangeLog.linaro
+++ b/src/gnattools/ChangeLog.linaro
@@ -0,0 +1,75 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
--- a/src/maintainer-scripts/ChangeLog.linaro
+++ b/src/maintainer-scripts/ChangeLog.linaro
@@ -0,0 +1,75 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
--- a/src/configure
+++ b/src/configure
@@ -2971,7 +2971,8 @@
     if test "$is_elf" = "yes"; then
       # Check for target supported by gold.
       case "${target}" in
-        i?86-*-* | x86_64-*-* | sparc*-*-* | powerpc*-*-* | arm*-*-* | tilegx*-*-*)
+        i?86-*-* | x86_64-*-* | sparc*-*-* | powerpc*-*-* | arm*-*-* \
+        | aarch64*-*-* | tilegx*-*-*)
 	  configdirs="$configdirs gold"
 	  if test x${ENABLE_GOLD} = xdefault; then
 	    default_ld=gold
--- a/src/libgcc/config.host
+++ b/src/libgcc/config.host
@@ -316,13 +316,15 @@
 case ${host} in
 aarch64*-*-elf)
 	extra_parts="$extra_parts crtbegin.o crtend.o crti.o crtn.o"
+	extra_parts="$extra_parts crtfastmath.o"
 	tmake_file="${tmake_file} ${cpu_type}/t-aarch64"
-	tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp"
+	tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm"
 	;;
 aarch64*-*-linux*)
+	extra_parts="$extra_parts crtfastmath.o"
 	md_unwind_header=aarch64/linux-unwind.h
 	tmake_file="${tmake_file} ${cpu_type}/t-aarch64"
-	tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp"
+	tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm"
 	;;
 alpha*-*-linux*)
 	tmake_file="${tmake_file} alpha/t-alpha alpha/t-ieee t-crtfm alpha/t-linux"
--- a/src/libgcc/ChangeLog.linaro
+++ b/src/libgcc/ChangeLog.linaro
@@ -0,0 +1,93 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r215013.
+	2014-09-08  Joseph Myers  <joseph@codesourcery.com>
+
+	* fp-bit.c (pack_d, unpack_d): Remove LARGEST_EXPONENT_IS_NORMAL
+	and ROUND_TOWARDS_ZERO conditionals.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-10-06  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r215086.
+	2014-09-09  Marcus Shawcroft  <marcus.shawcroft@arm.com>
+	Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
+
+	* config.host (aarch64*): Include crtfastmath.o and
+	t-crtfm.
+	* config/aarch64/crtfastmath.c: New file.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
--- a/src/libgcc/config/aarch64/crtfastmath.c
+++ b/src/libgcc/config/aarch64/crtfastmath.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2014 Free Software Foundation, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+#define _FPU_FPCR_FZ 0x1000000
+
+#define _FPU_SETCW(fpcr)				   \
+  {							   \
+    __asm__ __volatile__ ("msr	fpcr, %0" : : "r" (fpcr)); \
+  }
+
+static void __attribute__((constructor))
+set_fast_math (void)
+{
+  /* Flush to zero, round to nearest, IEEE exceptions disabled.  */
+  _FPU_SETCW (_FPU_FPCR_FZ);
+}
--- a/src/libgcc/config/arm/bpabi-v6m.S
+++ b/src/libgcc/config/arm/bpabi-v6m.S
@@ -150,7 +150,7 @@
 	mov r0, sp
 	push {r0, lr}
 	ldr r0, [sp, #8]
-	bl SYM(__gnu_uldivmod_helper)
+	bl SYM(__udivmoddi4)
 	ldr r3, [sp, #4]
 	mov lr, r3
 	add sp, sp, #8
--- a/src/libgcc/config/arm/bpabi.c
+++ b/src/libgcc/config/arm/bpabi.c
@@ -26,9 +26,6 @@
 extern unsigned long long __udivdi3 (unsigned long long, 
 				     unsigned long long);
 extern long long __gnu_ldivmod_helper (long long, long long, long long *);
-extern unsigned long long __gnu_uldivmod_helper (unsigned long long, 
-						 unsigned long long, 
-						 unsigned long long *);
 
 
 long long
@@ -43,14 +40,3 @@
   return quotient;
 }
 
-unsigned long long
-__gnu_uldivmod_helper (unsigned long long a, 
-		       unsigned long long b,
-		       unsigned long long *remainder)
-{
-  unsigned long long quotient;
-
-  quotient = __udivdi3 (a, b);
-  *remainder = a - b * quotient;
-  return quotient;
-}
--- a/src/libgcc/config/arm/bpabi.S
+++ b/src/libgcc/config/arm/bpabi.S
@@ -22,6 +22,8 @@
    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
+	.cfi_sections .debug_frame
+
 #ifdef __ARM_EABI__
 /* Some attributes that are common to all routines in this file.  */
 	/* Tag_ABI_align_needed: This code does not require 8-byte
@@ -126,49 +128,137 @@
 #endif
 .endm
 
+/* we can use STRD/LDRD on v5TE and later, and any Thumb-2 architecture. */
+#if (defined(__ARM_EABI__)                                            \
+     && (defined(__thumb2__)                                          \
+         || (__ARM_ARCH >= 5 && defined(__TARGET_FEATURE_DSP))))
+#define CAN_USE_LDRD 1
+#else
+#define CAN_USE_LDRD 0
+#endif
+
+/* set up stack from for call to __udivmoddi4. At the end of the macro the
+   stack is arranged as follows:
+		sp+12	/ space for remainder
+		sp+8	\ (written by __udivmoddi4)
+		sp+4	lr
+		sp+0	sp+8 [rp (remainder pointer) argument for __udivmoddi4]
+
+ */
+.macro push_for_divide fname
+#if defined(__thumb2__) && CAN_USE_LDRD
+	sub	ip, sp, #8
+	strd	ip, lr, [sp, #-16]!
+#else
+	sub	sp, sp, #8
+	do_push	{sp, lr}
+#endif
+	.cfi_adjust_cfa_offset 16
+	.cfi_offset 14, -12
+.endm
+
+/* restore stack */
+.macro pop_for_divide
+	ldr	lr, [sp, #4]
+#if CAN_USE_LDRD
+	ldrd	r2, r3, [sp, #8]
+	add	sp, sp, #16
+#else
+	add	sp, sp, #8
+	do_pop	{r2, r3}
+#endif
+	.cfi_restore 14
+	.cfi_adjust_cfa_offset 0
+.endm
+
 #ifdef L_aeabi_ldivmod
 
+/* Perform 64 bit signed division.
+   Inputs:
+	r0:r1	numerator
+	r2:r3	denominator
+   Outputs:
+	r0:r1	quotient
+	r2:r3	remainder
+ */
 ARM_FUNC_START aeabi_ldivmod
-	cfi_start	__aeabi_ldivmod, LSYM(Lend_aeabi_ldivmod)
-	test_div_by_zero signed
+	.cfi_startproc
+	test_div_by_zero	signed
 
-	sub sp, sp, #8
-#if defined(__thumb2__)
-	mov ip, sp
-	push {ip, lr}
-#else
-	do_push {sp, lr}
-#endif
-98:	cfi_push 98b - __aeabi_ldivmod, 0xe, -0xc, 0x10
-	bl SYM(__gnu_ldivmod_helper) __PLT__
-	ldr lr, [sp, #4]
-	add sp, sp, #8
-	do_pop {r2, r3}
+	push_for_divide	__aeabi_ldivmod
+	cmp	xxh, #0
+	blt	1f
+	cmp	yyh, #0
+	blt	2f
+	/* arguments in (r0:r1), (r2:r3) and *sp */
+	bl	SYM(__udivmoddi4) __PLT__
+	.cfi_remember_state
+	pop_for_divide
 	RET
-	cfi_end	LSYM(Lend_aeabi_ldivmod)
+
+1: /* xxh:xxl is negative */
+	.cfi_restore_state
+	negs	xxl, xxl
+	sbc	xxh, xxh, xxh, lsl #1	/* Thumb-2 has no RSC, so use X - 2X */
+	cmp	yyh, #0
+	blt	3f
+	/* arguments in (r0:r1), (r2:r3) and *sp */
+	bl	SYM(__udivmoddi4) __PLT__
+	.cfi_remember_state
+	pop_for_divide
+	negs	xxl, xxl
+	sbc	xxh, xxh, xxh, lsl #1	/* Thumb-2 has no RSC, so use X - 2X */
+	negs	yyl, yyl
+	sbc	yyh, yyh, yyh, lsl #1	/* Thumb-2 has no RSC, so use X - 2X */
+	RET
+
+2: /* only yyh:yyl is negative */
+	.cfi_restore_state
+	negs	yyl, yyl
+	sbc	yyh, yyh, yyh, lsl #1	/* Thumb-2 has no RSC, so use X - 2X */
+	/* arguments in (r0:r1), (r2:r3) and *sp */
+	bl	SYM(__udivmoddi4) __PLT__
+	.cfi_remember_state
+	pop_for_divide
+	negs	xxl, xxl
+	sbc	xxh, xxh, xxh, lsl #1	/* Thumb-2 has no RSC, so use X - 2X */
+	RET
+
+3: /* both xxh:xxl and yyh:yyl are negative */
+	.cfi_restore_state
+	negs	yyl, yyl
+	sbc	yyh, yyh, yyh, lsl #1	/* Thumb-2 has no RSC, so use X - 2X */
+	/* arguments in (r0:r1), (r2:r3) and *sp */
+	bl	SYM(__udivmoddi4) __PLT__
+	pop_for_divide
+	negs	yyl, yyl
+	sbc	yyh, yyh, yyh, lsl #1	/* Thumb-2 has no RSC, so use X - 2X */
+	RET
+
+	.cfi_endproc
 	
 #endif /* L_aeabi_ldivmod */
 
 #ifdef L_aeabi_uldivmod
 
+/* Perform 64 bit signed division.
+   Inputs:
+	r0:r1	numerator
+	r2:r3	denominator
+   Outputs:
+	r0:r1	quotient
+	r2:r3	remainder
+ */
 ARM_FUNC_START aeabi_uldivmod
-	cfi_start	__aeabi_uldivmod, LSYM(Lend_aeabi_uldivmod)
-	test_div_by_zero unsigned
+	.cfi_startproc
+	test_div_by_zero	unsigned
 
-	sub sp, sp, #8
-#if defined(__thumb2__)
-	mov ip, sp
-	push {ip, lr}
-#else
-	do_push {sp, lr}
-#endif
-98:	cfi_push 98b - __aeabi_uldivmod, 0xe, -0xc, 0x10
-	bl SYM(__gnu_uldivmod_helper) __PLT__
-	ldr lr, [sp, #4]
-	add sp, sp, #8
-	do_pop {r2, r3}
+	push_for_divide	__aeabi_uldivmod
+	/* arguments in (r0:r1), (r2:r3) and *sp */
+	bl	SYM(__udivmoddi4) __PLT__
+	pop_for_divide
 	RET
-	cfi_end	LSYM(Lend_aeabi_uldivmod)
+	.cfi_endproc
 
 #endif /* L_aeabi_divmod */
 	
--- a/src/libgcc/config/libbid/ChangeLog.linaro
+++ b/src/libgcc/config/libbid/ChangeLog.linaro
@@ -0,0 +1,75 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
--- a/src/libgcc/fp-bit.c
+++ b/src/libgcc/fp-bit.c
@@ -202,17 +202,9 @@
   int sign = src->sign;
   int exp = 0;
 
-  if (LARGEST_EXPONENT_IS_NORMAL (FRAC_NBITS) && (isnan (src) || isinf (src)))
+  if (isnan (src))
     {
-      /* We can't represent these values accurately.  By using the
-	 largest possible magnitude, we guarantee that the conversion
-	 of infinity is at least as big as any finite number.  */
       exp = EXPMAX;
-      fraction = ((fractype) 1 << FRACBITS) - 1;
-    }
-  else if (isnan (src))
-    {
-      exp = EXPMAX;
       /* Restore the NaN's payload.  */
       fraction >>= NGARDS;
       fraction &= QUIET_NAN - 1;
@@ -291,8 +283,7 @@
 	  fraction >>= NGARDS;
 #endif /* NO_DENORMALS */
 	}
-      else if (!LARGEST_EXPONENT_IS_NORMAL (FRAC_NBITS)
-	       && __builtin_expect (src->normal_exp > EXPBIAS, 0))
+      else if (__builtin_expect (src->normal_exp > EXPBIAS, 0))
 	{
 	  exp = EXPMAX;
 	  fraction = 0;
@@ -300,35 +291,25 @@
       else
 	{
 	  exp = src->normal_exp + EXPBIAS;
-	  if (!ROUND_TOWARDS_ZERO)
+	  /* IF the gard bits are the all zero, but the first, then we're
+	     half way between two numbers, choose the one which makes the
+	     lsb of the answer 0.  */
+	  if ((fraction & GARDMASK) == GARDMSB)
 	    {
-	      /* IF the gard bits are the all zero, but the first, then we're
-		 half way between two numbers, choose the one which makes the
-		 lsb of the answer 0.  */
-	      if ((fraction & GARDMASK) == GARDMSB)
-		{
-		  if (fraction & (1 << NGARDS))
-		    fraction += GARDROUND + 1;
-		}
-	      else
-		{
-		  /* Add a one to the guards to round up */
-		  fraction += GARDROUND;
-		}
-	      if (fraction >= IMPLICIT_2)
-		{
-		  fraction >>= 1;
-		  exp += 1;
-		}
+	      if (fraction & (1 << NGARDS))
+		fraction += GARDROUND + 1;
 	    }
-	  fraction >>= NGARDS;
-
-	  if (LARGEST_EXPONENT_IS_NORMAL (FRAC_NBITS) && exp > EXPMAX)
+	  else
 	    {
-	      /* Saturate on overflow.  */
-	      exp = EXPMAX;
-	      fraction = ((fractype) 1 << FRACBITS) - 1;
+	      /* Add a one to the guards to round up */
+	      fraction += GARDROUND;
 	    }
+	  if (fraction >= IMPLICIT_2)
+	    {
+	      fraction >>= 1;
+	      exp += 1;
+	    }
+	  fraction >>= NGARDS;
 	}
     }
 
@@ -556,8 +537,7 @@
 	  dst->fraction.ll = fraction;
 	}
     }
-  else if (!LARGEST_EXPONENT_IS_NORMAL (FRAC_NBITS)
-	   && __builtin_expect (exp == EXPMAX, 0))
+  else if (__builtin_expect (exp == EXPMAX, 0))
     {
       /* Huge exponent*/
       if (fraction == 0)
@@ -915,7 +895,7 @@
       low <<= 1;
     }
 
-  if (!ROUND_TOWARDS_ZERO && (high & GARDMASK) == GARDMSB)
+  if ((high & GARDMASK) == GARDMSB)
     {
       if (high & (1 << NGARDS))
 	{
@@ -1035,7 +1015,7 @@
 	numerator *= 2;
       }
 
-    if (!ROUND_TOWARDS_ZERO && (quotient & GARDMASK) == GARDMSB)
+    if ((quotient & GARDMASK) == GARDMSB)
       {
 	if (quotient & (1 << NGARDS))
 	  {
--- a/src/libdecnumber/ChangeLog.linaro
+++ b/src/libdecnumber/ChangeLog.linaro
@@ -0,0 +1,75 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
--- a/src/gcc/LINARO-VERSION
+++ b/src/gcc/LINARO-VERSION
@@ -0,0 +1 @@
+Snapshot 4.9-2015.10
--- a/src/gcc/targhooks.c
+++ b/src/gcc/targhooks.c
@@ -1357,7 +1357,62 @@
 #endif
 }
 
+/* For hooks which use the MOVE_RATIO macro, this gives the legacy default
+   behaviour.  SPEED_P is true if we are compiling for speed.  */
+
+static unsigned int
+get_move_ratio (bool speed_p ATTRIBUTE_UNUSED)
+{
+  unsigned int move_ratio;
+#ifdef MOVE_RATIO
+  move_ratio = (unsigned int) MOVE_RATIO (speed_p);
+#else
+#if defined (HAVE_movmemqi) || defined (HAVE_movmemhi) || defined (HAVE_movmemsi) || defined (HAVE_movmemdi) || defined (HAVE_movmemti)
+  move_ratio = 2;
+#else /* No movmem patterns, pick a default.  */
+  move_ratio = ((speed_p) ? 15 : 3);
+#endif
+#endif
+  return move_ratio;
+}
+
+/* Return TRUE if the move_by_pieces/set_by_pieces infrastructure should be
+   used; return FALSE if the movmem/setmem optab should be expanded, or
+   a call to memcpy emitted.  */
+
 bool
+default_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
+					unsigned int alignment,
+					enum by_pieces_operation op,
+					bool speed_p)
+{
+  unsigned int max_size = 0;
+  unsigned int ratio = 0;
+
+  switch (op)
+    {
+      case CLEAR_BY_PIECES:
+	max_size = STORE_MAX_PIECES;
+	ratio = CLEAR_RATIO (speed_p);
+	break;
+      case MOVE_BY_PIECES:
+	max_size = MOVE_MAX_PIECES;
+	ratio = get_move_ratio (speed_p);
+	break;
+      case SET_BY_PIECES:
+	max_size = STORE_MAX_PIECES;
+	ratio = SET_RATIO (speed_p);
+	break;
+      case STORE_BY_PIECES:
+	max_size = STORE_MAX_PIECES;
+	ratio = get_move_ratio (speed_p);
+	break;
+    }
+
+  return move_by_pieces_ninsns (size, alignment, max_size + 1) < ratio;
+}
+
+bool
 default_profile_before_prologue (void)
 {
 #ifdef PROFILE_BEFORE_PROLOGUE
--- a/src/gcc/targhooks.h
+++ b/src/gcc/targhooks.h
@@ -177,6 +177,11 @@
 extern int default_register_move_cost (enum machine_mode, reg_class_t,
 				       reg_class_t);
 
+extern bool default_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
+						    unsigned int,
+						    enum by_pieces_operation,
+						    bool);
+
 extern bool default_profile_before_prologue (void);
 extern reg_class_t default_preferred_reload_class (rtx, reg_class_t);
 extern reg_class_t default_preferred_output_reload_class (rtx, reg_class_t);
--- a/src/gcc/cppbuiltin.c
+++ b/src/gcc/cppbuiltin.c
@@ -53,18 +53,41 @@
     *patchlevel = s_patchlevel;
 }
 
+/* Parse a LINAROVER version string of the format "M.m-year.month[-spin][~dev]"
+   to create Linaro release number YYYYMM and spin version.  */
+static void
+parse_linarover (int *release, int *spin)
+{
+  static int s_year = -1, s_month, s_spin;
 
+  if (s_year == -1)
+    if (sscanf (LINAROVER, "%*[^-]-%d.%d-%d", &s_year, &s_month, &s_spin) != 3)
+      {
+	sscanf (LINAROVER, "%*[^-]-%d.%d", &s_year, &s_month);
+	s_spin = 0;
+      }
+
+  if (release)
+    *release = s_year * 100 + s_month;
+
+  if (spin)
+    *spin = s_spin;
+}
+
 /* Define __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__ and __VERSION__.  */
 static void
 define__GNUC__ (cpp_reader *pfile)
 {
-  int major, minor, patchlevel;
+  int major, minor, patchlevel, linaro_release, linaro_spin;
 
   parse_basever (&major, &minor, &patchlevel);
+  parse_linarover (&linaro_release, &linaro_spin);
   cpp_define_formatted (pfile, "__GNUC__=%d", major);
   cpp_define_formatted (pfile, "__GNUC_MINOR__=%d", minor);
   cpp_define_formatted (pfile, "__GNUC_PATCHLEVEL__=%d", patchlevel);
   cpp_define_formatted (pfile, "__VERSION__=\"%s\"", version_string);
+  cpp_define_formatted (pfile, "__LINARO_RELEASE__=%d", linaro_release);
+  cpp_define_formatted (pfile, "__LINARO_SPIN__=%d", linaro_spin);
   cpp_define_formatted (pfile, "__ATOMIC_RELAXED=%d", MEMMODEL_RELAXED);
   cpp_define_formatted (pfile, "__ATOMIC_SEQ_CST=%d", MEMMODEL_SEQ_CST);
   cpp_define_formatted (pfile, "__ATOMIC_ACQUIRE=%d", MEMMODEL_ACQUIRE);
--- a/src/gcc/tree-ssa-threadupdate.c
+++ b/src/gcc/tree-ssa-threadupdate.c
@@ -156,8 +156,9 @@
 		       bool registering)
 {
   fprintf (dump_file,
-	   "  %s jump thread: (%d, %d) incoming edge; ",
+	   "  %s%s jump thread: (%d, %d) incoming edge; ",
 	   (registering ? "Registering" : "Cancelling"),
+	   (path[0]->type == EDGE_FSM_THREAD ? " FSM": ""),
 	   path[0]->e->src->index, path[0]->e->dest->index);
 
   for (unsigned int i = 1; i < path.length (); i++)
@@ -699,6 +700,10 @@
 	  if ((*path)[1]->type != EDGE_COPY_SRC_JOINER_BLOCK)
 	    EDGE_SUCC (rd->dup_blocks[0], 0)->count += e->count;
 
+	  /* If we redirect a loop latch edge cancel its loop.  */
+	  if (e->src == e->src->loop_father->latch)
+	    mark_loop_for_removal (e->src->loop_father);
+
 	  /* Redirect the incoming edge (possibly to the joiner block) to the
 	     appropriate duplicate block.  */
 	  e2 = redirect_edge_and_branch (e, rd->dup_blocks[0]);
@@ -779,7 +784,6 @@
   edge e, e2;
   edge_iterator ei;
   ssa_local_info_t local_info;
-  struct loop *loop = bb->loop_father;
 
   /* To avoid scanning a linear array for the element we need we instead
      use a hash table.  For normal code there should be no noticeable
@@ -787,32 +791,6 @@
      incoming and outgoing edges such linear searches can get expensive.  */
   redirection_data.create (EDGE_COUNT (bb->succs));
 
-  /* If we thread the latch of the loop to its exit, the loop ceases to
-     exist.  Make sure we do not restrict ourselves in order to preserve
-     this loop.  */
-  if (loop->header == bb)
-    {
-      e = loop_latch_edge (loop);
-      vec<jump_thread_edge *> *path = THREAD_PATH (e);
-
-      if (path
-	  && (((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && joiners)
-	      || ((*path)[1]->type == EDGE_COPY_SRC_BLOCK && !joiners)))
-	{
-	  for (unsigned int i = 1; i < path->length (); i++)
-	    {
-	      edge e2 = (*path)[i]->e;
-
-	      if (loop_exit_edge_p (loop, e2))
-		{
-		  loop->header = NULL;
-		  loop->latch = NULL;
-		  loops_state_set (LOOPS_NEED_FIXUP);
-		}
-	    }
-	}
-    }
-
   /* Record each unique threaded destination into a hash table for
      efficient lookups.  */
   FOR_EACH_EDGE (e, ei, bb->preds)
@@ -1256,9 +1234,7 @@
     {
       /* If the loop ceased to exist, mark it as such, and thread through its
 	 original header.  */
-      loop->header = NULL;
-      loop->latch = NULL;
-      loops_state_set (LOOPS_NEED_FIXUP);
+      mark_loop_for_removal (loop);
       return thread_block (header, false);
     }
 
@@ -1622,6 +1598,210 @@
   return false;
 }
 
+/* Verify that the REGION is a valid jump thread.  A jump thread is a special
+   case of SEME Single Entry Multiple Exits region in which all nodes in the
+   REGION have exactly one incoming edge.  The only exception is the first block
+   that may not have been connected to the rest of the cfg yet.  */
+
+DEBUG_FUNCTION void
+verify_jump_thread (basic_block *region, unsigned n_region)
+{
+  for (unsigned i = 0; i < n_region; i++)
+    gcc_assert (EDGE_COUNT (region[i]->preds) <= 1);
+}
+
+/* Return true when BB is one of the first N items in BBS.  */
+
+static inline bool
+bb_in_bbs (basic_block bb, basic_block *bbs, int n)
+{
+  for (int i = 0; i < n; i++)
+    if (bb == bbs[i])
+      return true;
+
+  return false;
+}
+
+/* Duplicates a jump-thread path of N_REGION basic blocks.
+   The ENTRY edge is redirected to the duplicate of the region.
+
+   Remove the last conditional statement in the last basic block in the REGION,
+   and create a single fallthru edge pointing to the same destination as the
+   EXIT edge.
+
+   The new basic blocks are stored to REGION_COPY in the same order as they had
+   in REGION, provided that REGION_COPY is not NULL.
+
+   Returns false if it is unable to copy the region, true otherwise.  */
+
+static bool
+duplicate_thread_path (edge entry, edge exit,
+		       basic_block *region, unsigned n_region,
+		       basic_block *region_copy)
+{
+  unsigned i;
+  bool free_region_copy = false;
+  struct loop *loop = entry->dest->loop_father;
+  edge exit_copy;
+  edge redirected;
+  int total_freq = 0, entry_freq = 0;
+  gcov_type total_count = 0, entry_count = 0;
+
+  if (!can_copy_bbs_p (region, n_region))
+    return false;
+
+  /* Some sanity checking.  Note that we do not check for all possible
+     missuses of the functions.  I.e. if you ask to copy something weird,
+     it will work, but the state of structures probably will not be
+     correct.  */
+  for (i = 0; i < n_region; i++)
+    {
+      /* We do not handle subloops, i.e. all the blocks must belong to the
+	 same loop.  */
+      if (region[i]->loop_father != loop)
+	return false;
+    }
+
+  initialize_original_copy_tables ();
+
+  set_loop_copy (loop, loop);
+
+  if (!region_copy)
+    {
+      region_copy = XNEWVEC (basic_block, n_region);
+      free_region_copy = true;
+    }
+
+  if (entry->dest->count)
+    {
+      total_count = entry->dest->count;
+      entry_count = entry->count;
+      /* Fix up corner cases, to avoid division by zero or creation of negative
+	 frequencies.  */
+      if (entry_count > total_count)
+	entry_count = total_count;
+    }
+  else
+    {
+      total_freq = entry->dest->frequency;
+      entry_freq = EDGE_FREQUENCY (entry);
+      /* Fix up corner cases, to avoid division by zero or creation of negative
+	 frequencies.  */
+      if (total_freq == 0)
+	total_freq = 1;
+      else if (entry_freq > total_freq)
+	entry_freq = total_freq;
+    }
+
+  copy_bbs (region, n_region, region_copy, &exit, 1, &exit_copy, loop,
+	    split_edge_bb_loc (entry), false);
+
+  /* Fix up: copy_bbs redirects all edges pointing to copied blocks.  The
+     following code ensures that all the edges exiting the jump-thread path are
+     redirected back to the original code: these edges are exceptions
+     invalidating the property that is propagated by executing all the blocks of
+     the jump-thread path in order.  */
+
+  for (i = 0; i < n_region; i++)
+    {
+      edge e;
+      edge_iterator ei;
+      basic_block bb = region_copy[i];
+
+      if (single_succ_p (bb))
+	{
+	  /* Make sure the successor is the next node in the path.  */
+	  gcc_assert (i + 1 == n_region
+		      || region_copy[i + 1] == single_succ_edge (bb)->dest);
+	  continue;
+	}
+
+      /* Special case the last block on the path: make sure that it does not
+	 jump back on the copied path.  */
+      if (i + 1 == n_region)
+	{
+	  FOR_EACH_EDGE (e, ei, bb->succs)
+	    if (bb_in_bbs (e->dest, region_copy, n_region - 1))
+	      {
+		basic_block orig = get_bb_original (e->dest);
+		if (orig)
+		  redirect_edge_and_branch_force (e, orig);
+	      }
+	  continue;
+	}
+
+      /* Redirect all other edges jumping to non-adjacent blocks back to the
+	 original code.  */
+      FOR_EACH_EDGE (e, ei, bb->succs)
+	if (region_copy[i + 1] != e->dest)
+	  {
+	    basic_block orig = get_bb_original (e->dest);
+	    if (orig)
+	      redirect_edge_and_branch_force (e, orig);
+	  }
+    }
+
+  if (total_count)
+    {
+      scale_bbs_frequencies_gcov_type (region, n_region,
+				       total_count - entry_count,
+				       total_count);
+      scale_bbs_frequencies_gcov_type (region_copy, n_region, entry_count,
+				       total_count);
+    }
+  else
+    {
+      scale_bbs_frequencies_int (region, n_region, total_freq - entry_freq,
+				 total_freq);
+      scale_bbs_frequencies_int (region_copy, n_region, entry_freq, total_freq);
+    }
+
+#ifdef ENABLE_CHECKING
+  verify_jump_thread (region_copy, n_region);
+#endif
+
+  /* Remove the last branch in the jump thread path.  */
+  remove_ctrl_stmt_and_useless_edges (region_copy[n_region - 1], exit->dest);
+  edge e = make_edge (region_copy[n_region - 1], exit->dest, EDGE_FALLTHRU);
+
+  if (e) {
+    rescan_loop_exit (e, true, false);
+    e->probability = REG_BR_PROB_BASE;
+    e->count = region_copy[n_region - 1]->count;
+  }
+
+  /* Redirect the entry and add the phi node arguments.  */
+  if (entry->dest == loop->header)
+    mark_loop_for_removal (loop);
+  redirected = redirect_edge_and_branch (entry, get_bb_copy (entry->dest));
+  gcc_assert (redirected != NULL);
+  flush_pending_stmts (entry);
+
+  /* Add the other PHI node arguments.  */
+  add_phi_args_after_copy (region_copy, n_region, NULL);
+
+  if (free_region_copy)
+    free (region_copy);
+
+  free_original_copy_tables ();
+  return true;
+}
+
+/* Return true when PATH is a valid jump-thread path.  */
+
+static bool
+valid_jump_thread_path (vec<jump_thread_edge *> *path)
+{
+  unsigned len = path->length ();
+
+  /* Check that the path is connected.  */
+  for (unsigned int j = 0; j < len - 1; j++)
+    if ((*path)[j]->e->dest != (*path)[j+1]->e->src)
+      return false;
+
+  return true;
+}
+
 /* Walk through all blocks and thread incoming edges to the appropriate
    outgoing edge for each edge pair recorded in THREADED_EDGES.
 
@@ -1651,6 +1831,70 @@
   threaded_blocks = BITMAP_ALLOC (NULL);
   memset (&thread_stats, 0, sizeof (thread_stats));
 
+  /* Jump-thread all FSM threads before other jump-threads.  */
+  for (i = 0; i < paths.length ();)
+    {
+      vec<jump_thread_edge *> *path = paths[i];
+      edge entry = (*path)[0]->e;
+
+      /* Only code-generate FSM jump-threads in this loop.  */
+      if ((*path)[0]->type != EDGE_FSM_THREAD)
+	{
+	  i++;
+	  continue;
+	}
+
+      /* Do not jump-thread twice from the same block.  */
+      if (bitmap_bit_p (threaded_blocks, entry->src->index)
+	  /* Verify that the jump thread path is still valid: a
+	     previous jump-thread may have changed the CFG, and
+	     invalidated the current path.  */
+	  || !valid_jump_thread_path (path))
+	{
+	  /* Remove invalid FSM jump-thread paths.  */
+	  delete_jump_thread_path (path);
+	  paths.unordered_remove (i);
+	  continue;
+	}
+
+      unsigned len = path->length ();
+      edge exit = (*path)[len - 1]->e;
+      basic_block *region = XNEWVEC (basic_block, len - 1);
+
+      for (unsigned int j = 0; j < len - 1; j++)
+	region[j] = (*path)[j]->e->dest;
+
+      if (duplicate_thread_path (entry, exit, region, len - 1, NULL))
+	{
+	  /* We do not update dominance info.  */
+	  free_dominance_info (CDI_DOMINATORS);
+	  bitmap_set_bit (threaded_blocks, entry->src->index);
+	  retval = true;
+	}
+
+      delete_jump_thread_path (path);
+      paths.unordered_remove (i);
+    }
+
+  /* Remove from PATHS all the jump-threads starting with an edge already
+     jump-threaded.  */
+  for (i = 0; i < paths.length ();)
+    {
+      vec<jump_thread_edge *> *path = paths[i];
+      edge entry = (*path)[0]->e;
+
+      /* Do not jump-thread twice from the same block.  */
+      if (bitmap_bit_p (threaded_blocks, entry->src->index))
+	{
+	  delete_jump_thread_path (path);
+	  paths.unordered_remove (i);
+	}
+      else
+	i++;
+    }
+
+  bitmap_clear (threaded_blocks);
+
   mark_threaded_blocks (threaded_blocks);
 
   initialize_original_copy_tables ();
@@ -1736,16 +1980,8 @@
 		/* Our path is still valid, thread it.  */
 	        if (e->aux)
 		  {
-		    struct loop *loop = (*path)[0]->e->dest->loop_father;
-
 		    if (thread_block ((*path)[0]->e->dest, false))
-		      {
-			/* This jump thread likely totally scrambled this loop.
-			   So arrange for it to be fixed up.  */
-			loop->header = NULL;
-			loop->latch = NULL;
-			e->aux = NULL;
-		      }
+		      e->aux = NULL;
 		    else
 		      {
 		        delete_jump_thread_path (path);
--- a/src/gcc/tree-ssa-threadupdate.h
+++ b/src/gcc/tree-ssa-threadupdate.h
@@ -26,6 +26,7 @@
 enum jump_thread_edge_type
 {
   EDGE_START_JUMP_THREAD,
+  EDGE_FSM_THREAD,
   EDGE_COPY_SRC_BLOCK,
   EDGE_COPY_SRC_JOINER_BLOCK,
   EDGE_NO_COPY_SRC_BLOCK
--- a/src/gcc/tree-pretty-print.c
+++ b/src/gcc/tree-pretty-print.c
@@ -1820,7 +1820,6 @@
     case RSHIFT_EXPR:
     case LROTATE_EXPR:
     case RROTATE_EXPR:
-    case VEC_LSHIFT_EXPR:
     case VEC_RSHIFT_EXPR:
     case WIDEN_LSHIFT_EXPR:
     case BIT_IOR_EXPR:
@@ -2977,7 +2976,6 @@
     case REDUC_MAX_EXPR:
     case REDUC_MIN_EXPR:
     case REDUC_PLUS_EXPR:
-    case VEC_LSHIFT_EXPR:
     case VEC_RSHIFT_EXPR:
     case VEC_UNPACK_HI_EXPR:
     case VEC_UNPACK_LO_EXPR:
@@ -3088,9 +3086,6 @@
     case RROTATE_EXPR:
       return "r>>";
 
-    case VEC_LSHIFT_EXPR:
-      return "v<<";
-
     case VEC_RSHIFT_EXPR:
       return "v>>";
 
--- a/src/gcc/c-family/ChangeLog.linaro
+++ b/src/gcc/c-family/ChangeLog.linaro
@@ -0,0 +1,75 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
--- a/src/gcc/java/ChangeLog.linaro
+++ b/src/gcc/java/ChangeLog.linaro
@@ -0,0 +1,75 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
--- a/src/gcc/c/c-parser.c
+++ b/src/gcc/c/c-parser.c
@@ -4210,7 +4210,8 @@
 		  init.original_type = NULL;
 		  c_parser_error (parser, "expected identifier");
 		  c_parser_skip_until_found (parser, CPP_COMMA, NULL);
-		  process_init_element (init, false, braced_init_obstack);
+		  process_init_element (input_location, init, false,
+					braced_init_obstack);
 		  return;
 		}
 	    }
@@ -4342,7 +4343,8 @@
 		  init.original_type = NULL;
 		  c_parser_error (parser, "expected %<=%>");
 		  c_parser_skip_until_found (parser, CPP_COMMA, NULL);
-		  process_init_element (init, false, braced_init_obstack);
+		  process_init_element (input_location, init, false,
+					braced_init_obstack);
 		  return;
 		}
 	    }
@@ -4363,11 +4365,12 @@
 {
   struct c_expr init;
   gcc_assert (!after || c_dialect_objc ());
+  location_t loc = c_parser_peek_token (parser)->location;
+
   if (c_parser_next_token_is (parser, CPP_OPEN_BRACE) && !after)
     init = c_parser_braced_init (parser, NULL_TREE, true);
   else
     {
-      location_t loc = c_parser_peek_token (parser)->location;
       init = c_parser_expr_no_commas (parser, after);
       if (init.value != NULL_TREE
 	  && TREE_CODE (init.value) != STRING_CST
@@ -4374,7 +4377,7 @@
 	  && TREE_CODE (init.value) != COMPOUND_LITERAL_EXPR)
 	init = convert_lvalue_to_rvalue (loc, init, true, true);
     }
-  process_init_element (init, false, braced_init_obstack);
+  process_init_element (loc, init, false, braced_init_obstack);
 }
 
 /* Parse a compound statement (possibly a function body) (C90 6.6.2,
--- a/src/gcc/c/c-typeck.c
+++ b/src/gcc/c/c-typeck.c
@@ -102,8 +102,8 @@
 static char *print_spelling (char *);
 static void warning_init (int, const char *);
 static tree digest_init (location_t, tree, tree, tree, bool, bool, int);
-static void output_init_element (tree, tree, bool, tree, tree, int, bool,
-				 struct obstack *);
+static void output_init_element (location_t, tree, tree, bool, tree, tree, int,
+				 bool, struct obstack *);
 static void output_pending_init_elements (int, struct obstack *);
 static int set_designator (int, struct obstack *);
 static void push_range_stack (tree, struct obstack *);
@@ -7188,13 +7188,15 @@
 	  if ((TREE_CODE (constructor_type) == RECORD_TYPE
 	       || TREE_CODE (constructor_type) == UNION_TYPE)
 	      && constructor_fields == 0)
-	    process_init_element (pop_init_level (1, braced_init_obstack),
+	    process_init_element (input_location,
+				  pop_init_level (1, braced_init_obstack),
 				  true, braced_init_obstack);
 	  else if (TREE_CODE (constructor_type) == ARRAY_TYPE
 		   && constructor_max_index
 		   && tree_int_cst_lt (constructor_max_index,
 				       constructor_index))
-	    process_init_element (pop_init_level (1, braced_init_obstack),
+	    process_init_element (input_location,
+				  pop_init_level (1, braced_init_obstack),
 				  true, braced_init_obstack);
 	  else
 	    break;
@@ -7394,10 +7396,9 @@
       /* When we come to an explicit close brace,
 	 pop any inner levels that didn't have explicit braces.  */
       while (constructor_stack->implicit)
-	{
-	  process_init_element (pop_init_level (1, braced_init_obstack),
-				true, braced_init_obstack);
-	}
+	process_init_element (input_location,
+			      pop_init_level (1, braced_init_obstack),
+			      true, braced_init_obstack);
       gcc_assert (!constructor_range_stack);
     }
 
@@ -7575,10 +7576,9 @@
       /* Designator list starts at the level of closest explicit
 	 braces.  */
       while (constructor_stack->implicit)
-	{
-	  process_init_element (pop_init_level (1, braced_init_obstack),
-				true, braced_init_obstack);
-	}
+	process_init_element (input_location,
+			      pop_init_level (1, braced_init_obstack),
+			      true, braced_init_obstack);
       constructor_designated = 1;
       return 0;
     }
@@ -8198,9 +8198,9 @@
    existing initializer.  */
 
 static void
-output_init_element (tree value, tree origtype, bool strict_string, tree type,
-		     tree field, int pending, bool implicit,
-		     struct obstack * braced_init_obstack)
+output_init_element (location_t loc, tree value, tree origtype,
+		     bool strict_string, tree type, tree field, int pending,
+		     bool implicit, struct obstack * braced_init_obstack)
 {
   tree semantic_type = NULL_TREE;
   bool maybe_const = true;
@@ -8298,8 +8298,8 @@
 
   if (semantic_type)
     value = build1 (EXCESS_PRECISION_EXPR, semantic_type, value);
-  value = digest_init (input_location, type, value, origtype, npc,
-      		       strict_string, require_constant_value);
+  value = digest_init (loc, type, value, origtype, npc, strict_string,
+		       require_constant_value);
   if (value == error_mark_node)
     {
       constructor_erroneous = 1;
@@ -8426,8 +8426,8 @@
 	{
 	  if (tree_int_cst_equal (elt->purpose,
 				  constructor_unfilled_index))
-	    output_init_element (elt->value, elt->origtype, true,
-				 TREE_TYPE (constructor_type),
+	    output_init_element (input_location, elt->value, elt->origtype,
+				 true, TREE_TYPE (constructor_type),
 				 constructor_unfilled_index, 0, false,
 				 braced_init_obstack);
 	  else if (tree_int_cst_lt (constructor_unfilled_index,
@@ -8481,8 +8481,8 @@
 	  if (tree_int_cst_equal (elt_bitpos, ctor_unfilled_bitpos))
 	    {
 	      constructor_unfilled_fields = elt->purpose;
-	      output_init_element (elt->value, elt->origtype, true,
-				   TREE_TYPE (elt->purpose),
+	      output_init_element (input_location, elt->value, elt->origtype,
+				   true, TREE_TYPE (elt->purpose),
 				   elt->purpose, 0, false,
 				   braced_init_obstack);
 	    }
@@ -8555,7 +8555,7 @@
    existing initializer.  */
 
 void
-process_init_element (struct c_expr value, bool implicit,
+process_init_element (location_t loc, struct c_expr value, bool implicit,
 		      struct obstack * braced_init_obstack)
 {
   tree orig_value = value.value;
@@ -8599,7 +8599,7 @@
       if ((TREE_CODE (constructor_type) == RECORD_TYPE
 	   || TREE_CODE (constructor_type) == UNION_TYPE)
 	  && constructor_fields == 0)
-	process_init_element (pop_init_level (1, braced_init_obstack),
+	process_init_element (loc, pop_init_level (1, braced_init_obstack),
 			      true, braced_init_obstack);
       else if ((TREE_CODE (constructor_type) == ARRAY_TYPE
 	        || TREE_CODE (constructor_type) == VECTOR_TYPE)
@@ -8606,7 +8606,7 @@
 	       && constructor_max_index
 	       && tree_int_cst_lt (constructor_max_index,
 				   constructor_index))
-	process_init_element (pop_init_level (1, braced_init_obstack),
+	process_init_element (loc, pop_init_level (1, braced_init_obstack),
 			      true, braced_init_obstack);
       else
 	break;
@@ -8684,7 +8684,7 @@
 	  if (value.value)
 	    {
 	      push_member_name (constructor_fields);
-	      output_init_element (value.value, value.original_type,
+	      output_init_element (loc, value.value, value.original_type,
 				   strict_string, fieldtype,
 				   constructor_fields, 1, implicit,
 				   braced_init_obstack);
@@ -8776,7 +8776,7 @@
 	  if (value.value)
 	    {
 	      push_member_name (constructor_fields);
-	      output_init_element (value.value, value.original_type,
+	      output_init_element (loc, value.value, value.original_type,
 				   strict_string, fieldtype,
 				   constructor_fields, 1, implicit,
 				   braced_init_obstack);
@@ -8828,7 +8828,7 @@
 	  if (value.value)
 	    {
 	      push_array_bounds (tree_to_uhwi (constructor_index));
-	      output_init_element (value.value, value.original_type,
+	      output_init_element (loc, value.value, value.original_type,
 				   strict_string, elttype,
 				   constructor_index, 1, implicit,
 				   braced_init_obstack);
@@ -8863,7 +8863,7 @@
 	    {
 	      if (TREE_CODE (value.value) == VECTOR_CST)
 		elttype = TYPE_MAIN_VARIANT (constructor_type);
-	      output_init_element (value.value, value.original_type,
+	      output_init_element (loc, value.value, value.original_type,
 				   strict_string, elttype,
 				   constructor_index, 1, implicit,
 				   braced_init_obstack);
@@ -8892,7 +8892,7 @@
       else
 	{
 	  if (value.value)
-	    output_init_element (value.value, value.original_type,
+	    output_init_element (loc, value.value, value.original_type,
 				 strict_string, constructor_type,
 				 NULL_TREE, 1, implicit,
 				 braced_init_obstack);
@@ -8911,8 +8911,8 @@
 	  while (constructor_stack != range_stack->stack)
 	    {
 	      gcc_assert (constructor_stack->implicit);
-	      process_init_element (pop_init_level (1,
-						    braced_init_obstack),
+	      process_init_element (loc,
+				    pop_init_level (1, braced_init_obstack),
 				    true, braced_init_obstack);
 	    }
 	  for (p = range_stack;
@@ -8920,7 +8920,8 @@
 	       p = p->prev)
 	    {
 	      gcc_assert (constructor_stack->implicit);
-	      process_init_element (pop_init_level (1, braced_init_obstack),
+	      process_init_element (loc,
+				    pop_init_level (1, braced_init_obstack),
 				    true, braced_init_obstack);
 	    }
 
--- a/src/gcc/c/c-tree.h
+++ b/src/gcc/c/c-tree.h
@@ -612,7 +612,8 @@
 extern struct c_expr pop_init_level (int, struct obstack *);
 extern void set_init_index (tree, tree, struct obstack *);
 extern void set_init_label (tree, struct obstack *);
-extern void process_init_element (struct c_expr, bool, struct obstack *);
+extern void process_init_element (location_t, struct c_expr, bool,
+				  struct obstack *);
 extern tree build_compound_literal (location_t, tree, tree, bool);
 extern void check_compound_literal_type (location_t, struct c_type_name *);
 extern tree c_start_case (location_t, location_t, tree);
--- a/src/gcc/c/ChangeLog.linaro
+++ b/src/gcc/c/ChangeLog.linaro
@@ -0,0 +1,75 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
--- a/src/gcc/target.def
+++ b/src/gcc/target.def
@@ -1048,11 +1048,13 @@
 
 DEFHOOK
 (macro_fusion_pair_p,
- "This hook is used to check whether two insns could be macro fused for\n\
-target microarchitecture. If this hook returns true for the given insn pair\n\
-(@var{condgen} and @var{condjmp}), scheduler will put them into a sched\n\
-group, and they will not be scheduled apart.",
- bool, (rtx condgen, rtx condjmp), NULL)
+ "This hook is used to check whether two insns should be macro fused for\n\
+a target microarchitecture. If this hook returns true for the given insn pair\n\
+(@var{prev} and @var{curr}), the scheduler will put them into a sched\n\
+group, and they will not be scheduled apart.  The two insns will be either\n\
+two SET insns or a compare and a conditional jump and this hook should\n\
+validate any dependencies needed to fuse the two insns together.",
+ bool, (rtx prev, rtx curr), NULL)
 
 /* The following member value is a pointer to a function called
    after evaluation forward dependencies of insns in chain given
@@ -1174,11 +1176,17 @@
  "\n\
 This hook controls what insns from the ready insn queue will be\n\
 considered for the multipass insn scheduling.  If the hook returns\n\
-zero for @var{insn}, the insn will be not chosen to\n\
-be issued.\n\
+zero for @var{insn}, the insn will be considered in multipass scheduling.\n\
+Positive return values will remove @var{insn} from consideration on\n\
+the current round of multipass scheduling.\n\
+Negative return values will remove @var{insn} from consideration for given\n\
+number of cycles.\n\
+Backends should be careful about returning non-zero for highest priority\n\
+instruction at position 0 in the ready list.  @var{ready_index} is passed\n\
+to allow backends make correct judgements.\n\
 \n\
 The default is that any ready insns can be chosen to be issued.",
- int, (rtx insn), NULL)
+ int, (rtx insn, int ready_index), NULL)
 
 /* This hook prepares the target for a new round of multipass
    scheduling.
@@ -1193,7 +1201,7 @@
 (first_cycle_multipass_begin,
  "This hook prepares the target backend for a new round of multipass\n\
 scheduling.",
- void, (void *data, char *ready_try, int n_ready, bool first_cycle_insn_p),
+ void, (void *data, signed char *ready_try, int n_ready, bool first_cycle_insn_p),
  NULL)
 
 /* This hook is called when multipass scheduling evaluates instruction INSN.
@@ -1209,7 +1217,7 @@
 DEFHOOK
 (first_cycle_multipass_issue,
  "This hook is called when multipass scheduling evaluates instruction INSN.",
- void, (void *data, char *ready_try, int n_ready, rtx insn,
+ void, (void *data, signed char *ready_try, int n_ready, rtx insn,
 	const void *prev_data), NULL)
 
 /* This hook is called when multipass scheduling backtracks from evaluation of
@@ -1225,7 +1233,7 @@
 (first_cycle_multipass_backtrack,
  "This is called when multipass scheduling backtracks from evaluation of\n\
 an instruction.",
- void, (const void *data, char *ready_try, int n_ready), NULL)
+ void, (const void *data, signed char *ready_try, int n_ready), NULL)
 
 /* This hook notifies the target about the result of the concluded current
    round of multipass scheduling.
@@ -1421,26 +1429,6 @@
 @var{insn} should be generated.  In this case @var{label} can't be null.",
  rtx, (rtx insn, rtx label, unsigned int ds), NULL)
 
-/* The following member value is a pointer to a function controlling
-   what insns from the ready insn queue will be considered for the
-   multipass insn scheduling.  If the hook returns zero for the insn
-   passed as the parameter, the insn will not be chosen to be
-   issued.  This hook is used to discard speculative instructions,
-   that stand at the first position of the ready list.  */
-DEFHOOK
-(first_cycle_multipass_dfa_lookahead_guard_spec,
- "This hook is used as a workaround for\n\
-@samp{TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD} not being\n\
-called on the first instruction of the ready list.  The hook is used to\n\
-discard speculative instructions that stand first in the ready list from\n\
-being scheduled on the current cycle.  If the hook returns @code{false},\n\
-@var{insn} will not be chosen to be issued.\n\
-For non-speculative instructions,\n\
-the hook should always return @code{true}.  For example, in the ia64 backend\n\
-the hook is used to cancel data speculative insns when the ALAT table\n\
-is nearly full.",
- bool, (const_rtx insn), NULL)
-
 /* The following member value is a pointer to a function that provides
    information about the speculation capabilities of the target.
    The parameter is a pointer to spec_info variable.  */
@@ -3039,6 +3027,43 @@
  int, (enum machine_mode mode, reg_class_t rclass, bool in),
  default_memory_move_cost)
 
+DEFHOOK
+(use_by_pieces_infrastructure_p,
+ "GCC will attempt several strategies when asked to copy between\n\
+two areas of memory, or to set, clear or store to memory, for example\n\
+when copying a @code{struct}. The @code{by_pieces} infrastructure\n\
+implements such memory operations as a sequence of load, store or move\n\
+insns.  Alternate strategies are to expand the\n\
+@code{movmem} or @code{setmem} optabs, to emit a library call, or to emit\n\
+unit-by-unit, loop-based operations.\n\
+\n\
+This target hook should return true if, for a memory operation with a\n\
+given @var{size} and @var{alignment}, using the @code{by_pieces}\n\
+infrastructure is expected to result in better code generation.\n\
+Both @var{size} and @var{alignment} are measured in terms of storage\n\
+units.\n\
+\n\
+The parameter @var{op} is one of: @code{CLEAR_BY_PIECES},\n\
+@code{MOVE_BY_PIECES}, @code{SET_BY_PIECES}, @code{STORE_BY_PIECES}.\n\
+These describe the type of memory operation under consideration.\n\
+\n\
+The parameter @var{speed_p} is true if the code is currently being\n\
+optimized for speed rather than size.\n\
+\n\
+Returning true for higher values of @var{size} can improve code generation\n\
+for speed if the target does not provide an implementation of the\n\
+@code{movmem} or @code{setmem} standard names, if the @code{movmem} or\n\
+@code{setmem} implementation would be more expensive than a sequence of\n\
+insns, or if the overhead of a library call would dominate that of\n\
+the body of the memory operation.\n\
+\n\
+Returning true for higher values of @code{size} may also cause an increase\n\
+in code size, for example where the number of insns emitted to perform a\n\
+move would be greater than that of a library call.",
+ bool, (unsigned HOST_WIDE_INT size, unsigned int alignment,
+        enum by_pieces_operation op, bool speed_p),
+ default_use_by_pieces_infrastructure_p)
+
 /* True for MODE if the target expects that registers in this mode will
    be allocated to registers in a small register class.  The compiler is
    allowed to use registers explicitly used in the rtl as spill registers
--- a/src/gcc/optabs.c
+++ b/src/gcc/optabs.c
@@ -483,17 +483,16 @@
       return fma_optab;
 
     case REDUC_MAX_EXPR:
-      return TYPE_UNSIGNED (type) ? reduc_umax_optab : reduc_smax_optab;
+      return TYPE_UNSIGNED (type)
+	     ? reduc_umax_scal_optab : reduc_smax_scal_optab;
 
     case REDUC_MIN_EXPR:
-      return TYPE_UNSIGNED (type) ? reduc_umin_optab : reduc_smin_optab;
+      return TYPE_UNSIGNED (type)
+	     ? reduc_umin_scal_optab : reduc_smin_scal_optab;
 
     case REDUC_PLUS_EXPR:
-      return TYPE_UNSIGNED (type) ? reduc_uplus_optab : reduc_splus_optab;
+      return reduc_plus_scal_optab;
 
-    case VEC_LSHIFT_EXPR:
-      return vec_shl_optab;
-
     case VEC_RSHIFT_EXPR:
       return vec_shr_optab;
 
@@ -585,8 +584,27 @@
       return unknown_optab;
     }
 }
-
 
+/* Given optab UNOPTAB that reduces a vector to a scalar, find instead the old
+   optab that produces a vector with the reduction result in one element,
+   for a tree with type TYPE.  */
+
+optab
+scalar_reduc_to_vector (optab unoptab, const_tree type)
+{
+  switch (unoptab)
+    {
+    case reduc_plus_scal_optab:
+      return TYPE_UNSIGNED (type) ? reduc_uplus_optab : reduc_splus_optab;
+
+    case reduc_smin_scal_optab: return reduc_smin_optab;
+    case reduc_umin_scal_optab: return reduc_umin_optab;
+    case reduc_smax_scal_optab: return reduc_smax_optab;
+    case reduc_umax_scal_optab: return reduc_umax_optab;
+    default: return unknown_optab;
+    }
+}
+
 /* Expand vector widening operations.
 
    There are two different classes of operations handled here:
@@ -726,7 +744,7 @@
   return true;
 }
 
-/* Generate insns for VEC_LSHIFT_EXPR, VEC_RSHIFT_EXPR.  */
+/* Generate insns for VEC_RSHIFT_EXPR.  */
 
 rtx
 expand_vec_shift_expr (sepops ops, rtx target)
@@ -737,21 +755,10 @@
   enum machine_mode mode = TYPE_MODE (ops->type);
   tree vec_oprnd = ops->op0;
   tree shift_oprnd = ops->op1;
-  optab shift_optab;
 
-  switch (ops->code)
-    {
-      case VEC_RSHIFT_EXPR:
-	shift_optab = vec_shr_optab;
-	break;
-      case VEC_LSHIFT_EXPR:
-	shift_optab = vec_shl_optab;
-	break;
-      default:
-	gcc_unreachable ();
-    }
+  gcc_assert (ops->code == VEC_RSHIFT_EXPR);
 
-  icode = optab_handler (shift_optab, mode);
+  icode = optab_handler (vec_shr_optab, mode);
   gcc_assert (icode != CODE_FOR_nothing);
 
   rtx_op1 = expand_normal (vec_oprnd);
@@ -4234,7 +4241,7 @@
 	    y = const0_rtx;
 	}
 
-      *pmode = word_mode;
+      *pmode = ret_mode;
       prepare_cmp_insn (x, y, comparison, NULL_RTX, unsignedp, methods,
 			ptest, pmode);
     }
--- a/src/gcc/optabs.h
+++ b/src/gcc/optabs.h
@@ -160,6 +160,11 @@
    vector shifts and rotates */
 extern optab optab_for_tree_code (enum tree_code, const_tree, enum optab_subtype);
 
+/* Given an optab that reduces a vector to a scalar, find instead the old
+   optab that produces a vector with the reduction result in one element,
+   for a tree with the specified type.  */
+extern optab scalar_reduc_to_vector (optab, const_tree type);
+
 /* The various uses that a comparison can have; used by can_compare_p:
    jumps, conditional moves, store flag operations.  */
 enum can_compare_purpose
@@ -233,7 +238,7 @@
 
 /* Generate code for VEC_COND_EXPR.  */
 extern rtx expand_vec_cond_expr (tree, tree, tree, tree, rtx);
-/* Generate code for VEC_LSHIFT_EXPR and VEC_RSHIFT_EXPR.  */
+/* Generate code for VEC_RSHIFT_EXPR.  */
 extern rtx expand_vec_shift_expr (sepops, rtx);
 
 /* Return true if target supports vector operations for VEC_PERM_EXPR.  */
--- a/src/gcc/defaults.h
+++ b/src/gcc/defaults.h
@@ -914,14 +914,6 @@
 #define PREFERRED_DEBUGGING_TYPE NO_DEBUG
 #endif
 
-#ifndef LARGEST_EXPONENT_IS_NORMAL
-#define LARGEST_EXPONENT_IS_NORMAL(SIZE) 0
-#endif
-
-#ifndef ROUND_TOWARDS_ZERO
-#define ROUND_TOWARDS_ZERO 0
-#endif
-
 #ifndef FLOAT_LIB_COMPARE_RETURNS_BOOL
 #define FLOAT_LIB_COMPARE_RETURNS_BOOL(MODE, COMPARISON) false
 #endif
@@ -1065,6 +1057,15 @@
 #define MOVE_MAX_PIECES   MOVE_MAX
 #endif
 
+/* STORE_MAX_PIECES is the number of bytes at a time that we can
+   store efficiently.  Due to internal GCC limitations, this is
+   MOVE_MAX_PIECES limited by the number of bytes GCC can represent
+   for an immediate constant.  */
+
+#ifndef STORE_MAX_PIECES
+#define STORE_MAX_PIECES  MIN (MOVE_MAX_PIECES, 2 * sizeof (HOST_WIDE_INT))
+#endif
+
 #ifndef MAX_MOVE_MAX
 #define MAX_MOVE_MAX MOVE_MAX
 #endif
--- a/src/gcc/optabs.def
+++ b/src/gcc/optabs.def
@@ -243,6 +243,13 @@
 OPTAB_D (sincos_optab, "sincos$a3")
 OPTAB_D (tan_optab, "tan$a2")
 
+/* Vector reduction to a scalar.  */
+OPTAB_D (reduc_smax_scal_optab, "reduc_smax_scal_$a")
+OPTAB_D (reduc_smin_scal_optab, "reduc_smin_scal_$a")
+OPTAB_D (reduc_plus_scal_optab, "reduc_plus_scal_$a")
+OPTAB_D (reduc_umax_scal_optab, "reduc_umax_scal_$a")
+OPTAB_D (reduc_umin_scal_optab, "reduc_umin_scal_$a")
+/* (Old) Vector reduction, returning a vector with the result in one lane.  */
 OPTAB_D (reduc_smax_optab, "reduc_smax_$a")
 OPTAB_D (reduc_smin_optab, "reduc_smin_$a")
 OPTAB_D (reduc_splus_optab, "reduc_splus_$a")
@@ -249,6 +256,7 @@
 OPTAB_D (reduc_umax_optab, "reduc_umax_$a")
 OPTAB_D (reduc_umin_optab, "reduc_umin_$a")
 OPTAB_D (reduc_uplus_optab, "reduc_uplus_$a")
+
 OPTAB_D (sdot_prod_optab, "sdot_prod$I$a")
 OPTAB_D (ssum_widen_optab, "widen_ssum$I$a3")
 OPTAB_D (udot_prod_optab, "udot_prod$I$a")
@@ -266,7 +274,6 @@
 OPTAB_D (vec_perm_optab, "vec_perm$a")
 OPTAB_D (vec_realign_load_optab, "vec_realign_load_$a")
 OPTAB_D (vec_set_optab, "vec_set$a")
-OPTAB_D (vec_shl_optab, "vec_shl_$a")
 OPTAB_D (vec_shr_optab, "vec_shr_$a")
 OPTAB_D (vec_unpacks_float_hi_optab, "vec_unpacks_float_hi_$a")
 OPTAB_D (vec_unpacks_float_lo_optab, "vec_unpacks_float_lo_$a")
--- a/src/gcc/target.h
+++ b/src/gcc/target.h
@@ -78,6 +78,17 @@
   SWITCH_TYPE_LINE_END		/* Please emit a line terminator.  */
 };
 
+/* Types of memory operation understood by the "by_pieces" infrastructure.
+   Used by the TARGET_USE_BY_PIECES_INFRASTRUCTURE_P target hook.  */
+
+enum by_pieces_operation
+{
+  CLEAR_BY_PIECES,
+  MOVE_BY_PIECES,
+  SET_BY_PIECES,
+  STORE_BY_PIECES
+};
+
 typedef int (* print_switch_fn_type) (print_switch_type, const char *);
 
 /* An example implementation for ELF targets.  Defined in varasm.c  */
--- a/src/gcc/rtlanal.c
+++ b/src/gcc/rtlanal.c
@@ -5606,7 +5606,8 @@
     inner = strip_address_mutations (&XEXP (*inner, 0));
   if (REG_P (*inner)
       || MEM_P (*inner)
-      || GET_CODE (*inner) == SUBREG)
+      || GET_CODE (*inner) == SUBREG
+      || GET_CODE (*inner) == SCRATCH)
     return inner;
   return 0;
 }
--- a/src/gcc/configure
+++ b/src/gcc/configure
@@ -1695,7 +1695,8 @@
                           use sysroot as the system root during the build
   --with-sysroot[=DIR]    search for usr/lib, usr/include, et al, within DIR
   --with-specs=SPECS      add SPECS to driver command-line processing
-  --with-pkgversion=PKG   Use PKG in the version string in place of "GCC"
+  --with-pkgversion=PKG   Use PKG in the version string in place of "Linaro
+                          GCC `cat $srcdir/LINARO-VERSION`"
   --with-bugurl=URL       Direct users to URL to report a bug
   --with-multilib-list    select multilibs (AArch64, SH and x86-64 only)
   --with-gnu-ld           assume the C compiler uses GNU ld default=no
@@ -7240,7 +7241,7 @@
       *)   PKGVERSION="($withval) " ;;
      esac
 else
-  PKGVERSION="(GCC) "
+  PKGVERSION="(Linaro GCC `cat $srcdir/LINARO-VERSION`) "
 
 fi
 
@@ -17945,7 +17946,7 @@
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 17948 "configure"
+#line 17949 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -18051,7 +18052,7 @@
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 18054 "configure"
+#line 18055 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
--- a/src/gcc/lra-eliminations.c
+++ b/src/gcc/lra-eliminations.c
@@ -1164,7 +1164,9 @@
 		     ep->from, ep->to);
 	  /* If after processing RTL we decides that SP can be used as
 	     a result of elimination, it can not be changed.  */
-	  gcc_assert (ep->to_rtx != stack_pointer_rtx);
+	  gcc_assert ((ep->to_rtx != stack_pointer_rtx)
+		      || (ep->from < FIRST_PSEUDO_REGISTER
+			  && fixed_regs [ep->from]));
 	  /* Mark that is not eliminable anymore.  */
 	  elimination_map[ep->from] = NULL;
 	  for (ep1 = ep + 1; ep1 < &reg_eliminate[NUM_ELIMINABLE_REGS]; ep1++)
--- a/src/gcc/cfghooks.c
+++ b/src/gcc/cfghooks.c
@@ -569,14 +569,10 @@
       struct loop *loop = bb->loop_father;
 
       /* If we remove the header or the latch of a loop, mark the loop for
-	 removal by setting its header and latch to NULL.  */
+	 removal.  */
       if (loop->latch == bb
 	  || loop->header == bb)
-	{
-	  loop->header = NULL;
-	  loop->latch = NULL;
-	  loops_state_set (LOOPS_NEED_FIXUP);
-	}
+	mark_loop_for_removal (loop);
 
       remove_bb_from_loops (bb);
     }
@@ -760,11 +756,7 @@
 	  /* ... we merge two loop headers, in which case we kill
 	     the inner loop.  */
 	  if (b->loop_father->header == b)
-	    {
-	      b->loop_father->header = NULL;
-	      b->loop_father->latch = NULL;
-	      loops_state_set (LOOPS_NEED_FIXUP);
-	    }
+	    mark_loop_for_removal (b->loop_father);
 	}
       /* If we merge a loop header into its predecessor, update the loop
 	 structure.  */
@@ -1103,9 +1095,7 @@
 	  && cloop->header == bb)
 	{
 	  add_bb_to_loop (new_bb, loop_outer (cloop));
-	  cloop->header = NULL;
-	  cloop->latch = NULL;
-	  loops_state_set (LOOPS_NEED_FIXUP);
+	  mark_loop_for_removal (cloop);
 	}
       else
 	{
--- a/src/gcc/fold-const.c
+++ b/src/gcc/fold-const.c
@@ -1446,8 +1446,7 @@
       int count = TYPE_VECTOR_SUBPARTS (type), i;
       tree *elts = XALLOCAVEC (tree, count);
 
-      if (code == VEC_LSHIFT_EXPR
-	  || code == VEC_RSHIFT_EXPR)
+      if (code == VEC_RSHIFT_EXPR)
 	{
 	  if (!tree_fits_uhwi_p (arg2))
 	    return NULL_TREE;
@@ -1459,11 +1458,10 @@
 	  if (shiftc >= outerc || (shiftc % innerc) != 0)
 	    return NULL_TREE;
 	  int offset = shiftc / innerc;
-	  /* The direction of VEC_[LR]SHIFT_EXPR is endian dependent.
-	     For reductions, compiler emits VEC_RSHIFT_EXPR always,
-	     for !BYTES_BIG_ENDIAN picks first vector element, but
-	     for BYTES_BIG_ENDIAN last element from the vector.  */
-	  if ((code == VEC_RSHIFT_EXPR) ^ (!BYTES_BIG_ENDIAN))
+	  /* The direction of VEC_RSHIFT_EXPR is endian dependent.
+	     For reductions, if !BYTES_BIG_ENDIAN then compiler picks first
+	     vector element, but last element if BYTES_BIG_ENDIAN.  */
+	  if (BYTES_BIG_ENDIAN)
 	    offset = -offset;
 	  tree zero = build_zero_cst (TREE_TYPE (type));
 	  for (i = 0; i < count; i++)
@@ -8530,12 +8528,13 @@
     case REDUC_MAX_EXPR:
     case REDUC_PLUS_EXPR:
       {
-	unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i;
+	unsigned int nelts, i;
 	tree *elts;
 	enum tree_code subcode;
 
 	if (TREE_CODE (op0) != VECTOR_CST)
 	  return NULL_TREE;
+        nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (op0));
 
 	elts = XALLOCAVEC (tree, nelts);
 	if (!vec_cst_ctor_to_array (op0, elts))
@@ -8554,10 +8553,9 @@
 	    elts[0] = const_binop (subcode, elts[0], elts[i]);
 	    if (elts[0] == NULL_TREE || !CONSTANT_CLASS_P (elts[0]))
 	      return NULL_TREE;
-	    elts[i] = build_zero_cst (TREE_TYPE (type));
 	  }
 
-	return build_vector (type, elts);
+	return elts[0];
       }
 
     default:
--- a/src/gcc/omp-low.c
+++ b/src/gcc/omp-low.c
@@ -11747,6 +11747,7 @@
      iteration increment and the condition/branch.  */
   basic_block orig_exit = EDGE_PRED (EXIT_BLOCK_PTR_FOR_FN (cfun), 0)->src;
   basic_block incr_bb = create_empty_bb (orig_exit);
+  add_bb_to_loop (incr_bb, body_bb->loop_father);
   /* The succ of orig_exit was EXIT_BLOCK_PTR_FOR_FN (cfun), with an empty
      flag.  Set it now to be a FALLTHRU_EDGE.  */
   gcc_assert (EDGE_COUNT (orig_exit->succs) == 1);
@@ -11771,7 +11772,6 @@
   loop->safelen = node->simdclone->simdlen;
   loop->force_vect = true;
   loop->header = body_bb;
-  add_bb_to_loop (incr_bb, loop);
 
   /* Branch around the body if the mask applies.  */
   if (node->simdclone->inbranch)
@@ -11812,7 +11812,7 @@
   gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
   e = split_block (incr_bb, gsi_stmt (gsi));
   basic_block latch_bb = e->dest;
-  basic_block new_exit_bb = e->dest;
+  basic_block new_exit_bb;
   new_exit_bb = split_block (latch_bb, NULL)->dest;
   loop->latch = latch_bb;
 
--- a/src/gcc/objc/ChangeLog.linaro
+++ b/src/gcc/objc/ChangeLog.linaro
@@ -0,0 +1,75 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
--- a/src/gcc/ChangeLog.linaro
+++ b/src/gcc/ChangeLog.linaro
@@ -0,0 +1,4385 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+	* LINARO-VERSION: Update.
+
+2015-10-28 Yvan Roux  <yvan.roux@linaro.org>
+	   Sebastian Pop  <s.pop@samsung.com>
+
+	Backport from trunk r221007, r221675, r222011.
+	2015-04-11  Jakub Jelinek  <jakub@redhat.com>
+
+	PR tree-optimization/65735
+	* tree-ssa-threadedge.c (fsm_find_control_statement_thread_paths):
+	Remove visited_phis argument, add visited_bbs, avoid recursing into the
+	same bb rather than just into the same phi node.
+	(thread_through_normal_block): Adjust caller.
+
+	2015-03-25  Sebastian Pop  <s.pop@samsung.com>
+
+	PR tree-optimization/65177
+	* tree-ssa-threadupdate.c (verify_seme): Renamed verify_jump_thread.
+	(bb_in_bbs): New.
+	(duplicate_seme_region): Renamed duplicate_thread_path.  Redirect all
+	edges not adjacent on the path to the original code.
+
+	2015-02-26  Sebastian Pop  <s.pop@samsung.com>
+
+	PR tree-optimization/65048
+	* tree-ssa-threadupdate.c (valid_jump_thread_path): New.
+	(thread_through_all_blocks): Call valid_jump_thread_path.
+	Remove invalid FSM jump-thread paths.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* LINARO-VERSION: Bump version.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+	* LINARO-VERSION: Update.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* LINARO-VERSION: Bump version.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+	* LINARO-VERSION: Update.
+
+2015-06-02  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	Backport from trunk r217753.
+	2014-11-19  Jakub Jelinek  <jakub@redhat.com>
+
+	PR rtl-optimization/63843
+	* simplify-rtx.c (simplify_binary_operation_1) <case ASHIFTRT>: For
+	optimization of ashiftrt of subreg of lshiftrt, check that code
+	is ASHIFTRT.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* LINARO-VERSION: Bump version.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+	* LINARO-VERSION: Update.
+
+2015-04-15  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	Backport from trunk r220348.
+	2015-02-02  Tejas Belagod  <tejas.belagod@arm.com>
+	    Andrew Pinski  <pinskia@gcc.gnu.org>
+	    Jakub Jelinek  <jakub@gcc.gnu.org>
+
+	PR target/64231
+	* config/aarch64/aarch64.c (aarch64_classify_symbol): Fix large
+	integer typing for small model. Use IN_RANGE.
+
+2015-04-14  Michael Collison  <michael.collison@linaro.org>
+
+	Backport from trunk r220399, r220413.
+
+	2015-02-04  Matthew Wahab  <matthew.wahab@arm.com>
+
+	* config/aarch64/aarch64-cores.def: Add cortex-a72 and
+	cortex-a72.cortex-a53.
+	* config/aarch64/aarch64-tune.md: Regenerate.
+	* doc/invoke.texi (AArch64 Options/-mtune): Add "cortex-a72".
+
+	2015-02-04  Matthew Wahab  <matthew.wahab@arm.com>
+
+	* config/arm/arm-cores.def: Add cortex-a72 and
+	cortex-a72.cortex-a53.
+	* config/arm/bpabi.h (BE8_LINK_SPEC): Likewise.
+	* config/arm/t-aprofile (MULTILIB_MATCHES): Likewise.
+	* config/arm/arm-tune.md: Regenerate.
+	* config/arm/arm-tables.opt: Add entries for "cortex-a72" and
+	"cortex-a72.cortex-a53".
+	* doc/invoke.texi (ARM Options/-mtune): Likewise.
+
+2015-04-13  Michael Collison  <michael.collison@linaro.org>
+
+	Backport from trunk r219724, 219746, r220103.
+
+	2014-01-25  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* config/arm/arm-cores.def (cortex-a57): Use the new Cortex-A57
+	pipeline model.
+	config/arm/arm.md: Include the new Cortex-A57 model.
+	(generic_sched): Don't use generic_sched when tuning for
+	Cortex-A57.
+
+	2015-01-16  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* config/arm/cortex-a57.md: Remove duplicate of file accidentally
+	introduced in revision 219724.
+
+	2015-01-16  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* config/arm/cortex-a57.md: New.
+	* config/aarch64/aarch64.md: Include it.
+	* config/aarch64/aarch64-cores.def (cortex-a57): Tune for it.
+	* config/aarch64/aarch64-tune.md: Regenerate.
+
+2015-04-10  Michael Collison  <michael.collison@linaro.org>
+
+	Backport from trunk r218145, r218146, r219472.
+
+	2015-01-12  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/arm/arm.c (arm_cortex_a12_tune): Update entries to match
+	Cortex-A17 tuning parameters.
+	* config/arm/arm-cores.def (cortex-a12): Schedule for cortex-a17.
+
+	2014-11-28  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/arm/arm.md (generic_sched): Specify cortexa17 in 'no' list.
+	Include cortex-a17.md.
+	* config/arm/arm.c (arm_issue_rate): Specify 2 for cortexa17.
+	* config/arm/arm-cores.def (cortex-a17): New entry.
+	* config/arm/arm-tables.opt: Regenerate.
+	* config/arm/arm-tune.md: Regenerate.
+	* config/arm/bpabi.h (BE8_LINK_SPEC): Specify mcpu=cortex-a17.
+	* config/arm/cortex-a17.md: New file.
+	* config/arm/cortex-a17-neon.md: New file.
+	* config/arm/driver-arm.c (arm_cpu_table): Add entry for cortex-a17.
+	* config/arm/t-aprofile: Add cortex-a17 entries to MULTILIB_MATCHES.
+
+	2014-11-28  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/arm/arm-cores.def (cortex-a17.cortex-a7): New entry.
+	* config/arm/arm-tables.opt: Regenerate.
+	* config/arm/arm-tune.md: Regenerate.
+	* config/arm/bpabi.h (BE8_LINK_SPEC): Add mcpu=cortex-a17.cortex-a7.
+	* config/arm/t-aprofile: Add cortex-a17.cortex-a7 entry to
+	MULTILIB_MATCHES.
+
+2015-04-09  Yvan Roux  <yvan.roux@linaro.org>
+
+	Fix partial backport done at r221911.
+	* gcc/config/aarch64/aarch64.c: Fix cost tables for APM XGene-1
+
+2015-04-09  Kugan Vivekanandarajah  <kugan.vivekanandarajah@linaro.org>
+
+	Backport from trunk r219745.
+	2015-01-16  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+            Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
+
+	PR target/64263
+	* config/aarch64/aarch64.md (*movsi_aarch64): Don't split if the
+	destination is not a GP reg.
+	(*movdi_aarch64): Likewise.
+
+2015-04-09  Kugan Vivekanandarajah  <kugan.vivekanandarajah@linaro.org>
+
+	Backport from trunk r219578.
+	2015-01-14  Joey Ye  <joey.ye@arm.com>
+
+	* config/arm/arm.c (arm_compute_save_reg_mask):
+	Do not save lr in case of tail call.
+	* config/arm/thumb2.md (*thumb2_pop_single): New pattern.
+
+2015-04-09  Kugan Vivekanandarajah  <kugan.vivekanandarajah@linaro.org>
+
+	Backport from trunk r219544.
+	2015-01-13  Renlin Li  <renlin.li@arm.com>
+
+	* config/arm/arm.h (CLZ_DEFINED_VALUE_AT_ZERO): Return 2.
+	(CTZ_DEFINED_VALUE_AT_ZERO): Ditto.
+
+2015-04-08  Charles Baylis  <charles.baylis@linaro.org>
+
+	Backport from trunk r215567, r216672.
+	2014-10-24  Charles Baylis  <charles.baylis@linaro.org>
+
+	* config/aarch64/arm_neon.h (__LD2_LANE_FUNC): Rewrite using builtins,
+	update uses to use new macro arguments.
+	(__LD3_LANE_FUNC): Likewise.
+	(__LD4_LANE_FUNC): Likewise.
+
+	2014-10-24  Charles Baylis  <charles.baylis@linaro.org>
+
+	* config/aarch64/aarch64-builtins.c
+	(aarch64_types_loadstruct_lane_qualifiers): Define.
+	* config/aarch64/aarch64-simd-builtins.def (ld2_lane, ld3_lane,
+	ld4_lane): New builtins.
+	* config/aarch64/aarch64-simd.md (aarch64_vec_load_lanesoi_lane<mode>):
+	New pattern.
+	(aarch64_vec_load_lanesci_lane<mode>): Likewise.
+	(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
+	(aarch64_ld2_lane<mode>): New expand.
+	(aarch64_ld3_lane<mode>): Likewise.
+	(aarch64_ld4_lane<mode>): Likewise.
+	* config/aarch64/aarch64.md (define_c_enum "unspec"): Add
+	UNSPEC_LD2_LANE, UNSPEC_LD3_LANE, UNSPEC_LD4_LANE.
+
+2015-04-07  Michael Collison  <michael.collison@linaro.org>
+
+	Backport from trunk r219656, r219657, r219659, r219661, r219679.
+	2015-01-15  Philipp Tomsich  <philipp.tomsich@theobroma-systems.com>
+
+	* config/aarch64/aarch64-cores.def (xgene1): Update/add the
+	xgene1 (APM XGene-1) core definition.
+	* gcc/config/aarch64/aarch64.c: Add cost tables for APM XGene-1
+	* config/arm/aarch-cost-tables.h: Add cost tables for APM XGene-1
+	* doc/invoke.texi: Document -mcpu=xgene1.
+
+	2015-01-15  Philipp Tomsich  <ptomsich@theobroma-systems.com>
+
+	* config/aarch64/aarch64.md: Include xgene1.md.
+	* config/aarch64/xgene1.md: New file.
+
+	2015-01-15  Philipp Tomsich  <philipp.tomsich@theobroma-systems.com>
+
+	* config/arm/arm.md (generic_sched): Specify xgene1 in 'no' list.
+	Include xgene1.md.
+	* config/arm/arm.c (arm_issue_rate): Specify 4 for xgene1.
+	* config/arm/arm-cores.def (xgene1): New entry.
+	* config/arm/arm-tables.opt: Regenerate.
+	* config/arm/arm-tune.md: Regenerate.
+	* config/arm/bpabi.h (BE8_LINK_SPEC): Specify mcpu=xgene1.
+
+	2015-01-15  Richard Earnshaw  <rearnsha@arm.com>
+
+	* arm.c (arm_xgene_tune): Add default initializer for instruction
+	fusion.
+
+2015-04-07  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r217062, r217646, r218658.
+	2014-12-12  Zhenqiang Chen  <zhenqiang.chen@arm.com>
+
+	PR rtl-optimization/63917
+	* ifcvt.c (cc_in_cond): New function.
+	(end_ifcvt_sequence): Make sure new generated insns do not clobber CC.
+	(noce_process_if_block, check_cond_move_block): Check CC references.
+
+	2014-11-17  Zhenqiang Chen  <zhenqiang.chen@arm.com>
+
+	* ifcvt.c (HAVE_cbranchcc4): Define.
+	(noce_emit_cmove, noce_get_alt_condition, noce_get_condition):
+	Use HAVE_cbranchcc4.
+
+	2014-11-04  Zhenqiang Chen  <zhenqiang.chen@arm.com>
+
+	Revert:
+		2014-11-03  Zhenqiang Chen  <zhenqiang.chen@arm.com>
+	* ifcvt.c (noce_emit_cmove, noce_get_alt_condition, noce_get_condition):
+	Allow CC mode if HAVE_cbranchcc4.
+
+2015-04-02  Maxim Kuvyrkov  <maxim.kuvyrkov@linaro.org>
+
+	Fix testcase backported from trunk
+
+	* gcc/testsuite/gcc.dg/pr64935-1.c: Ignore warnings that can't be
+	disabled with not-yet-existing -Wno-shift-count-overflow.
+
+2015-04-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r218958, r218960, r218961.
+	2014-12-19  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* config/aarch64/aarch64.c (<LOGICAL:optab>_one_cmpl<mode>3):
+	Reparameterize to...
+	(<NLOGICAL:optab>_one_cmpl<mode>3): with extra SIMD-register variant.
+	(xor_one_cmpl<mode>3): New define_insn_and_split.
+
+	* config/aarch64/iterators.md (NLOGICAL): New define_code_iterator.
+
+	2014-12-19  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* config/aarch64/aarch64.md (<optab><mode>3, one_cmpl<mode>2):
+	Add SIMD-register variant.
+	* config/aarch64/iterators.md (Vbtype): Add value for SI.
+
+	2014-12-19  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* config/aarch64/aarch64.md (subdi3, adddi3_aarch64): Don't penalize
+	SIMD reg variant.
+
+2015-04-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r218897.
+	2014-12-19  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* doc/invoke.texi (ARM options): Remove mention of Advanced RISC
+	Machines.
+
+2015-04-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r218895.
+	2014-12-19  Xingxing Pan <xxingpan@marvell.com>
+
+	* config/arm/cortex-a9-neon.md (cortex_a9_neon_vmov): Change
+	reservation to cortex_a9_neon_dp.
+
+2015-04-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r218530.
+	2014-12-09  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* config/aarch64/aarch64.md (absdi2): Remove scratch operand by
+	earlyclobbering result operand.
+
+	* config/aarch64/aarch64-builtins.c (aarch64_types_unop_qualifiers):
+	Remove final qualifier_internal.
+	(aarch64_fold_builtin): Stop folding abs builtins, except on floats.
+
+2015-04-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r218526.
+	2014-12-09  Wilco Dijkstra  <wilco.dijkstra@arm.com>
+
+	* gcc/config/aarch64/aarch64-protos.h (tune-params): Add reasociation
+	tuning parameters.
+	* gcc/config/aarch64/aarch64.c (TARGET_SCHED_REASSOCIATION_WIDTH):
+	Define.
+	(aarch64_reassociation_width): New function.
+	(generic_tunings): Add reassociation tuning parameters.
+	(cortexa53_tunings): Likewise.
+	(cortexa57_tunings): Likewise.
+	(thunderx_tunings): Likewise.
+
+2015-04-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r218866.
+	2014-12-18  Wilco Dijkstra  <wilco.dijkstra@arm.com>
+
+	* gcc/config/aarch64/aarch64.c (TARGET_MIN_DIVISIONS_FOR_RECIP_MUL):
+	Define.
+	(aarch64_min_divisions_for_recip_mul): New function.
+
+2015-04-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r218867, r218868.
+	2014-12-18  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* config/aarch64/aarch64-simd.md (aarch64_lshr_simddi): Handle shift
+	by 64 by moving const0_rtx.
+	(aarch64_ushr_simddi): Delete.
+
+	* config/aarch64/aarch64.md (enum unspec): Delete UNSPEC_USHR64.
+
+	2014-12-18  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* config/aarch64/aarch64.md (enum "unspec"): Remove UNSPEC_SSHR64.
+
+	* config/aarch64/aarch64-simd.md (aarch64_ashr_simddi): Change shift
+	amount to 63 if was 64.
+	(aarch64_sshr_simddi): Remove.
+
+2015-04-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r218855.
+	2014-12-18  Bin Cheng  <bin.cheng@arm.com>
+
+	PR tree-optimization/62178
+	* tree-ssa-loop-ivopts.c (cheaper_cost_with_cand): New function.
+	(iv_ca_replace): New function.
+	(try_improve_iv_set): New parameter try_replace_p.
+	Break local optimal fixed-point by calling iv_ca_replace.
+	(find_optimal_iv_set_1): Pass new argument to try_improve_iv_set.
+
+2015-04-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r218829.
+	2014-12-17  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* config/aarch64/aarch64.md (generic_sched): Delete it.
+
+2015-03-27  Michael Collison  <michael.collison@linaro.org>
+
+	Backport from trunk r218432, r218635, r219470.
+
+	2015-01-12  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/arm/arm-protos.h (tune_params): Add fuseable_ops field.
+	* config/arm/arm.c (arm_macro_fusion_p): New function.
+	(arm_macro_fusion_pair_p): Likewise.
+	(TARGET_SCHED_MACRO_FUSION_P): Define.
+	(TARGET_SCHED_MACRO_FUSION_PAIR_P): Likewise.
+	(ARM_FUSE_NOTHING): Likewise.
+	(ARM_FUSE_MOVW_MOVT): Likewise.
+	(arm_slowmul_tune, arm_fastmul_tune, arm_strongarm_tune,
+	arm_xscale_tune, arm_9e_tune, arm_v6t2_tune, arm_cortex_tune,
+	arm_cortex_a8_tune, arm_cortex_a7_tune, arm_cortex_a15_tune,
+	arm_cortex_a53_tune, arm_cortex_a57_tune, arm_cortex_a9_tune,
+	arm_cortex_a12_tune, arm_v7m_tune, arm_v6m_tune, arm_fa726te_tune
+	arm_cortex_a5_tune): Specify fuseable_ops value.
+
+	2014-12-11  Renlin Li  <renlin.li@arm.com>
+
+	* config/aarch64/aarch64-cores.def: Change all AARCH64_FL_FPSIMD to
+	AARCH64_FL_FOR_ARCH8.
+	* config/aarch64/aarch64.c (all_cores): Use FLAGS from
+	aarch64-cores.def file only.
+
+	2014-12-05  Renlin Li  <renlin.li@arm.com>
+
+	* config/aarch64/aarch64-opts.h (AARCH64_CORE): Rename IDENT to SCHED.
+	* config/aarch64/aarch64.h (AARCH64_CORE): Likewise.
+	* config/aarch64/aarch64.c (AARCH64_CORE): Rename X to IDENT,
+	IDENT to SCHED.
+
+2015-03-24  Maxim Kuvyrkov  <maxim.kuvyrkov@linaro.org>
+
+	Backport from trunk r210736, r210737, r210744, r210746, r210747,
+	r210845, r213708, r213709, r216620, r216621, r216622, r216623,
+	r216624, r219787, r219789, r219893, r220316, r220808.
+
+	2015-02-19  Maxim Kuvyrkov  <maxim.kuvyrkov@linaro.org>
+
+        * haifa-sched.c (enum rfs_decision, rfs_str): Remove RFS_DEBUG.
+        (rank_for_schedule_debug): Update.
+        (ready_sort): Make static.  Move sorting logic to ...
+        (ready_sort_debug, ready_sort_real): New static functions.
+        (schedule_block): Sort both debug insns and real insns in preparation
+        for ready list trimming.  Improve debug output.
+        * sched-int.h (ready_sort): Remove global declaration.
+
+	2015-02-01  Maxim Kuvyrkov  <maxim.kuvyrkov@linaro.org>
+
+	* haifa-sched.c (INSN_RFS_DEBUG_ORIG_ORDER): New access macro.
+	(rank_for_schedule_debug): Split from ...
+	(rank_for_schedule): ... this.
+	(ready_sort): Sort DEBUG_INSNs separately from normal INSNs.
+	* sched-int.h (struct _haifa_insn_data): New field rfs_debug_orig_order.
+
+	2015-01-20  Maxim Kuvyrkov  <maxim.kuvyrkov@linaro.org>
+
+	* config/arm/arm-protos.h (enum arm_sched_autopref): New constants.
+	(struct tune_params): Use the enum.
+	* arm.c (arm_*_tune): Update.
+	(arm_option_override): Update.
+
+	2015-01-17  Maxim Kuvyrkov  <maxim.kuvyrkov@linaro.org>
+
+	* config/arm/arm-protos.h (struct tune_params): New field
+	sched_autopref_queue_depth.
+	* config/arm/arm.c (sched-int.h): Include header.
+	(arm_first_cycle_multipass_dfa_lookahead_guard,)
+	(TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD): Define hook.
+	(arm_slowmul_tune, arm_fastmul_tune, arm_strongarm_tune,)
+	(arm_xscale_tune, arm_9e_tune, arm_v6t2_tune, arm_cortex_tune,)
+	(arm_cortex_a8_tune, arm_cortex_a7_tune, arm_cortex_a15_tune,)
+	(arm_cortex_a53_tune, arm_cortex_a57_tune, arm_xgene1_tune,)
+	(arm_cortex_a5_tune, arm_cortex_a9_tune, arm_cortex_a12_tune,)
+	(arm_v7m_tune, arm_cortex_m7_tune, arm_v6m_tune, arm_fa726te_tune):
+	Specify sched_autopref_queue_depth value.  Enabled for A15 and A57.
+	* config/arm/t-arm (arm.o): Update.
+	* haifa-sched.c (update_insn_after_change): Update.
+	(rank_for_schedule): Use auto-prefetcher model, if requested.
+	(autopref_multipass_init): New static function.
+	(autopref_rank_for_schedule): New rank_for_schedule heuristic.
+	(autopref_multipass_dfa_lookahead_guard_started_dump_p): New static
+	variable for debug dumps.
+	(autopref_multipass_dfa_lookahead_guard_1): New static helper function.
+	(autopref_multipass_dfa_lookahead_guard): New global function that
+	implements TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD hook.
+	(init_h_i_d): Update.
+	* params.def (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH): New tuning knob.
+	* sched-int.h (enum autopref_multipass_data_status): New const enum.
+	(autopref_multipass_data_): Structure for auto-prefetcher data.
+	(autopref_multipass_data_def, autopref_multipass_data_t): New typedefs.
+	(struct _haifa_insn_data:autopref_multipass_data): New field.
+	(INSN_AUTOPREF_MULTIPASS_DATA): New access macro.
+	(autopref_multipass_dfa_lookahead_guard): Declare.
+
+	2015-01-17  Maxim Kuvyrkov  <maxim.kuvyrkov@linaro.org>
+
+	* config/aarch64/aarch64.c
+	(aarch64_sched_first_cycle_multipass_dfa_lookahead): Implement hook.
+	(TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD): Define.
+	* config/arm/arm.c
+	(arm_first_cycle_multipass_dfa_lookahead): Implement hook.
+	(TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD): Define.
+
+	2014-10-24  Maxim Kuvyrkov  <maxim.kuvyrkov@gmail.com>
+
+        * rtlanal.c (get_base_term): Handle SCRATCH.
+
+	2014-10-24  Maxim Kuvyrkov  <maxim.kuvyrkov@gmail.com>
+
+        * haifa-sched.c (sched_init): Disable max_issue when scheduling for
+        register pressure.
+
+	2014-10-24  Maxim Kuvyrkov  <maxim.kuvyrkov@gmail.com>
+
+        * haifa-sched.c (cached_first_cycle_multipass_dfa_lookahead,)
+        (cached_issue_rate): Remove.  Use dfa_lookahead and issue_rate instead.
+        (max_issue, choose_ready, sched_init): Update.
+
+	2014-10-24  Maxim Kuvyrkov  <maxim.kuvyrkov@gmail.com>
+
+	* sched-int.h (struct _haifa_insn_data:last_rfs_win): New field.
+	* haifa-sched.c (INSN_LAST_RFS_WIN): New access macro.
+	(rfs_result): Set INSN_LAST_RFS_WIN.  Update signature.
+	(rank_for_schedule): Update calls to rfs_result to pass new parameters.
+	(print_rank_for_schedule_stats): Print out elements of ready list that
+	ended up on their respective places due to each of the sorting
+	heuristics.
+	(ready_sort): Update.
+	(debug_ready_list_1): Improve printout for SCHED_PRESSURE_MODEL.
+	(schedule_block): Update.
+
+	2014-10-24  Maxim Kuvyrkov  <maxim.kuvyrkov@gmail.com>
+
+	* haifa-sched.c (sched_class_regs_num, call_used_regs_num): New static
+	arrays.  Use sched_class_regs_num instead of ira_class_hard_regs_num.
+	(print_curr_reg_pressure, setup_insn_reg_pressure_info,)
+	(model_update_pressure, model_spill_cost): Use sched_class_regs_num.
+	(model_start_schedule): Update.
+	(sched_pressure_start_bb): New static function.  Calculate
+	sched_class_regs_num.
+	(schedule_block): Use it.
+	(alloc_global_sched_pressure_data): Calculate call_used_regs_num.
+
+	2014-08-07  Maxim Kuvyrkov  <maxim.kuvyrkov@linaro.org>
+
+	* haifa-sched.c (SCHED_SORT): Delete.  Macro used exactly once.
+	(enum rfs_decition:RFS_*): New constants wrapped in an enum.
+	(rfs_str): String corresponding to RFS_* constants.
+	(rank_for_schedule_stats_t): New typedef.
+	(rank_for_schedule_stats): New static variable.
+	(rfs_result): New static function.
+	(rank_for_schedule): Track statistics for deciding heuristics.
+	(rank_for_schedule_stats_diff, print_rank_for_schedule_stats): New
+	static functions.
+	(ready_sort): Use them for debug printouts.
+	(schedule_block): Init statistics state.  Print statistics on
+	rank_for_schedule decisions.
+
+	2014-08-07  Maxim Kuvyrkov  <maxim.kuvyrkov@linaro.org>
+
+	* haifa-sched.c (rank_for_schedule): Fix INSN_TICK-based heuristics.
+
+	2014-05-23  Maxim Kuvyrkov  <maxim.kuvyrkov@linaro.org>
+
+	Fix bootstrap error on ia64
+	* config/ia64/ia64.c (ia64_first_cycle_multipass_dfa_lookahead_guard):
+	Return default value.
+
+	2014-05-22  Maxim Kuvyrkov  <maxim.kuvyrkov@linaro.org>
+
+	Cleanup and improve multipass_dfa_lookahead_guard
+        * config/i386/i386.c (core2i7_first_cycle_multipass_filter_ready_try,)
+        (core2i7_first_cycle_multipass_begin,)
+        (core2i7_first_cycle_multipass_issue,)
+        (core2i7_first_cycle_multipass_backtrack): Update signature.
+        * config/ia64/ia64.c
+        (ia64_first_cycle_multipass_dfa_lookahead_guard_spec): Remove.
+        (ia64_first_cycle_multipass_dfa_lookahead_guard): Update signature.
+        (TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC): Remove
+        hook definition.
+        (ia64_first_cycle_multipass_dfa_lookahead_guard): Merge logic from
+        ia64_first_cycle_multipass_dfa_lookahead_guard_spec.  Update return
+        values.
+        * config/rs6000/rs6000.c (rs6000_use_sched_lookahead_guard): Update
+        return values.
+        * doc/tm.texi: Regenerate.
+        * doc/tm.texi.in
+        (TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC): Remove.
+        * haifa-sched.c (ready_try): Make signed to allow negative values.
+        (rebug_ready_list_1): Update.
+        (choose_ready): Simplify.
+        (sched_extend_ready_list): Update.
+
+	2014-05-22  Maxim Kuvyrkov  <maxim.kuvyrkov@linaro.org>
+
+	Remove IA64 speculation tweaking flags
+        * config/ia64/ia64.c (ia64_set_sched_flags): Delete handling of
+        speculation tuning flags.
+        (msched-prefer-non-data-spec-insns,)
+        (msched-prefer-non-control-spec-insns): Obsolete options.
+        * haifa-sched.c (choose_ready): Remove handling of
+        PREFER_NON_CONTROL_SPEC and PREFER_NON_DATA_SPEC.
+        * sched-int.h (enum SPEC_SCHED_FLAGS): Remove PREFER_NON_CONTROL_SPEC
+        and PREFER_NON_DATA_SPEC.
+        * sel-sched.c (process_spec_exprs): Remove handling of
+        PREFER_NON_CONTROL_SPEC and PREFER_NON_DATA_SPEC.
+
+	2014-05-22  Maxim Kuvyrkov  <maxim.kuvyrkov@linaro.org>
+
+	Improve scheduling debug output
+	* haifa-sched.c (debug_ready_list): Remove unnecessary prototype.
+	(advance_one_cycle): Update.
+	(schedule_insn, queue_to_ready): Add debug printouts.
+	(debug_ready_list_1): New static function.
+	(debug_ready_list): Update.
+	(max_issue): Add debug printouts.
+	(dump_insn_stream): New static function.
+	(schedule_block): Use it.  Also better indent printouts.
+
+	2014-05-22  Maxim Kuvyrkov  <maxim.kuvyrkov@linaro.org>
+
+	Fix sched_insn debug counter
+	* haifa-sched.c (schedule_insn): Update.
+	(struct haifa_saved_data): Add nonscheduled_insns_begin.
+	(save_backtrack_point, restore_backtrack_point): Update.
+	(first_nonscheduled_insn): New static function.
+	(queue_to_ready, choose_ready): Use it.
+	(schedule_block): Init nonscheduled_insns_begin.
+	(sched_emit_insn): Update.
+
+2015-03-18  Michael Collison  <michael.collison@linaro.org>
+
+	Backport from trunk r218007, r218010, r218012, r218013, r218014,
+	r218525.
+
+	2014-12-09  Andrew Pinski  apinski@cavium.com
+            Kyrylo Tkachov  kyrylo.tkachov@arm.com
+
+	* config/aarch64/aarch64.c (AARCH64_FUSE_CMP_BRANCH): New define.
+	(thunderx_tunings): Add AARCH64_FUSE_CMP_BRANCH to fuseable_ops.
+	(aarch_macro_fusion_pair_p): Handle AARCH64_FUSE_CMP_BRANCH.
+
+	2014-11-24  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/aarch64/aarch64.c (AARCH64_FUSE_ADRP_LDR): Define.
+	(cortexa53_tunings): Specify AARCH64_FUSE_ADRP_LDR in fuseable_ops.
+	(aarch_macro_fusion_pair_p): Handle AARCH64_FUSE_ADRP_LDR.
+
+	2014-11-24  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/aarch64/aarch64.c (AARCH64_FUSE_MOVK_MOVK): Define.
+	(cortexa53_tunings): Specify AARCH64_FUSE_MOVK_MOVK in fuseable_ops.
+	(cortexa57_tunings): Likewise.
+	(aarch_macro_fusion_pair_p): Handle AARCH64_FUSE_MOVK_MOVK.
+
+	2014-11-24  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* sched-deps.c (sched_macro_fuse_insns): Do not check modified_in_p
+	in the not conditional jump case.
+	* doc/tm.texi (TARGET_SCHED_MACRO_FUSION_PAIR_P): Update description.
+	* target.def (TARGET_SCHED_MACRO_FUSION_PAIR_P): Update description.
+
+	2014-11-24  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/aarch64/aarch64.c: Include tm-constrs.h
+	(AARCH64_FUSE_ADRP_ADD): Define.
+	(cortexa57_tunings): Add AARCH64_FUSE_ADRP_ADD to fuseable_ops.
+	(cortexa53_tunings): Likewise.
+	(aarch_macro_fusion_pair_p): Handle AARCH64_FUSE_ADRP_ADD.
+
+	2014-11-24  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/aarch64/aarch64-protos.h (struct tune_params): Add
+	fuseable_ops field.
+	* config/aarch64/aarch64.c (generic_tunings): Specify fuseable_ops.
+	(cortexa53_tunings): Likewise.
+	(cortexa57_tunings): Likewise.
+	(thunderx_tunings): Likewise.
+	(aarch64_macro_fusion_p): New function.
+	(aarch_macro_fusion_pair_p): Likewise.
+	(TARGET_SCHED_MACRO_FUSION_P): Define.
+	(TARGET_SCHED_MACRO_FUSION_PAIR_P): Likewise.
+	(AARCH64_FUSE_MOV_MOVK): Likewise.
+	(AARCH64_FUSE_NOTHING): Likewise.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	* LINARO-VERSION: Bump version.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+	* LINARO-VERSION: Update.
+
+2015-03-10  Michael Collison  <michael.collison@linaro.org>
+
+	Backport from trunk r218503.
+	2014-12-08  Sandra Loosemore  <sandra@codesourcery.com>
+
+	* simplify-rtx.c (simplify_relational_operation_1): Handle
+	simplification identities for BICS patterns.
+
+2015-03-10  Michael Collison  <michael.collison@linaro.org>
+
+	Backport from trunk r220751.
+	2015-02-17  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* haifa-sched.c (recompute_todo_spec): Treat SCHED_GROUP_P
+	as forcing a HARD_DEP between instructions, thereby
+	disallowing rewriting to break dependencies.
+
+2015-03-10  Michael Collison  <michael.collison@linaro.org>
+
+	Backport from trunk r217725.
+	2014-11-18  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/arm/cortex-a15-neon.md (cortex_a15_vfp_to_from_gp):
+	Split into...
+	(cortex_a15_gp_to_vfp): ...This.
+	(cortex_a15_fp_to_gp): ...And this.
+	Define and comment bypass from vfp operations to fp->gp moves.
+
+2015-03-10  Michael Collison  <michael.collison@linaro.org>
+
+	Backport from trunk r217780.
+	2014-11-19  Wilco Dijkstra  <wdijkstr@arm.com>
+
+	PR target/61915
+	* config/aarch64/aarch64.c (generic_regmove_cost): Increase FP move
+	cost.
+
+2015-03-10  Michael Collison  <michael.collison@linaro.org>
+
+	Backport from trunk r217938.
+	2014-11-21  Jiong Wang  <jiong.wang@arm.com>
+
+	* config/aarch64/iterators.md (VS): New mode iterator.
+	(vsi2qi): New mode attribute.
+	(VSI2QI): Likewise.
+	* config/aarch64/aarch64-simd-builtins.def: New entry for ctz.
+	* config/aarch64/aarch64-simd.md (ctz<mode>2): New pattern for ctz.
+	* config/aarch64/aarch64-builtins.c
+	(aarch64_builtin_vectorized_function): Support BUILT_IN_CTZ.
+
+2015-03-10  Michael Collison  <michael.collison@linaro.org>
+
+	Backport from trunk r217852.
+	2014-11-20  Tejas Belagod  <tejas.belagod@arm.com>
+
+	* config/aarch64/aarch64-protos.h (aarch64_classify_symbol):
+	Fixup prototype.
+	* config/aarch64/aarch64.c (aarch64_expand_mov_immediate,
+	aarch64_cannot_force_const_mem, aarch64_classify_address,
+	aarch64_classify_symbolic_expression): Fixup call to
+	aarch64_classify_symbol.
+	(aarch64_classify_symbol): Add range-checking for
+	symbol + offset addressing for tiny and small models.
+
+2015-03-06  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	Backport from trunk r217707.
+	2014-11-18  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* config/arm/neon-testgen.ml (emit_prologue): Handle new
+	compile_test_optim argument.
+	(emit_automatics): Rename to emit_variables. Support variable
+	indentation of its output.
+	(compile_test_optim): New function.
+	(test_intrinsic): Call compile_test_optim.
+	* config/arm/neon.ml (features): Add Compiler_optim.
+	(ops): Add Compiler_optim feature to Vbic and Vorn.
+	(type_in_crypto_only): Replace 'or' by '||'.
+	(reinterp): Likewise.
+	(reinterpq): Likewise.
+
+2015-03-05  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r212011, r214942, r214957, r215012, r215016, r218115,
+	r218733, r218746, r220491.
+	2015-02-06  Sebastian Pop  <s.pop@samsung.com>
+		    Brian Rzycki  <b.rzycki@samsung.com>
+
+	PR tree-optimization/64878
+	* tree-ssa-threadedge.c: Include tree-ssa-loop.h.
+	(fsm_find_control_statement_thread_paths): Add parameter seen_loop_phi.
+	Stop recursion at loop phi nodes after having visited a loop phi node.
+
+	2014-12-15  Richard Biener  <rguenther@suse.de>
+
+	PR middle-end/64246
+	* cfgloop.c (mark_loop_for_removal): Make safe against multiple
+	invocations on the same loop.
+
+	2014-12-15  Richard Biener  <rguenther@suse.de>
+
+	PR tree-optimization/64284
+	* tree-ssa-threadupdate.c (duplicate_seme_region): Mark
+	the loop for removal if we copied the loop header.
+
+	2014-11-27  Richard Biener  <rguenther@suse.de>
+
+	PR tree-optimization/64083
+	* tree-ssa-threadupdate.c (thread_through_all_blocks): Do not
+	forcibly mark loop for removal the wrong way.
+
+	2014-09-08  Richard Biener  <rguenther@suse.de>
+
+	PR ipa/63196
+	* tree-inline.c (copy_loops): The source loop header should
+	always be non-NULL.
+	(tree_function_versioning): If loops need fixup after removing
+	unreachable blocks fix them.
+	* omp-low.c (simd_clone_adjust): Do not add incr block to
+	loop under construction.
+
+	2014-09-08  Richard Biener  <rguenther@suse.de>
+
+	PR bootstrap/63204
+	* cfgloop.c (mark_loop_for_removal): Track former header
+	unconditionally.
+	* cfgloop.h (struct loop): Add former_header member unconditionally.
+	* loop-init.c (fix_loop_structure): Enable bogus loop removal
+	diagnostic unconditionally.
+
+	2014-09-05  Richard Biener  <rguenther@suse.de>
+
+	* cfgloop.c (mark_loop_for_removal): Record former header
+	when ENABLE_CHECKING.
+	* cfgloop.h (strut loop): Add former_header member when
+	ENABLE_CHECKING.
+	* loop-init.c (fix_loop_structure): Sanity check loops
+	marked for removal if they re-appeared.
+
+	2014-09-05  Richard Biener  <rguenther@suse.de>
+
+	* cfgloop.c (mark_loop_for_removal): New function.
+	* cfgloop.h (mark_loop_for_removal): Declare.
+	* cfghooks.c (delete_basic_block): Use mark_loop_for_removal.
+	(merge_blocks): Likewise.
+	(duplicate_block): Likewise.
+	* except.c (sjlj_emit_dispatch_table): Likewise.
+	* tree-eh.c (cleanup_empty_eh_merge_phis): Likewise.
+	* tree-ssa-threadupdate.c (ssa_redirect_edges): Likewise.
+	(thread_through_loop_header): Likewise.
+
+	2014-06-26  Richard Biener  <rguenther@suse.de>
+
+	PR tree-optimization/61607
+	* tree-ssa-threadupdate.c (ssa_redirect_edges): Cancel the
+	loop if we redirected its latch edge.
+	(thread_block_1): Do not cancel loops prematurely.
+
+2015-03-05  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r220860.
+	2015-02-20  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/aarch64/aarch64.md (*aarch64_lshr_sisd_or_int_<mode>3):
+	Mark operand 0 as earlyclobber in 2nd alternative.
+	(1st define_split below *aarch64_lshr_sisd_or_int_<mode>3):
+	Write negated shift amount into QI lowpart operand 0 and use it
+	in the shift step.
+	(2nd define_split below *aarch64_lshr_sisd_or_int_<mode>3): Likewise.
+
+2015-03-04  Prathamesh Kulkarni  <prathamesh.kulkarni@linaro.org>
+
+	Backport from trunk r215722.
+	2014-09-30  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* config/aarch64/aarch64-simd-builtins.def (sqdmull_laneq): Expand
+	iterator.
+	* config/aarch64/aarch64-simd.md
+	(aarch64_sqdmull_laneq<mode>): Expand iterator.
+	* config/aarch64/arm_neon.h (vqdmullh_laneq_s16): New.
+	(vqdmulls_lane_s32): Fix return type.
+	(vqdmulls_laneq_s32): New.
+
+2015-03-04  Prathamesh Kulkarni  <prathamesh.kulkarni@linaro.org>
+
+	Backport from trunk r215612.
+	2014-09-25  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* config/aarch64/aarch64-protos.h (aarch64_simd_const_bounds): Delete.
+	* config/aarch64/aarch64-simd.md (aarch64_<sur>q<r>shl<mode>): Use
+	new predicates.
+	(aarch64_<sur>shll2_n<mode>): Likewise.
+	(aarch64_<sur>shr_n<mode>): Likewise.
+	(aarch64_<sur>sra_n<mode>: Likewise.
+	(aarch64_<sur>s<lr>i_n<mode>): Likewise.
+	(aarch64_<sur>qshl<u>_n<mode>): Likewise.
+	* config/aarch64/aarch64.c (aarch64_simd_const_bounds): Delete.
+	* config/aarch64/iterators.md (ve_mode): New.
+	(offsetlr): Remap to infix text for use in new predicates.
+	* config/aarch64/predicates.md (aarch64_simd_shift_imm_qi): New.
+	(aarch64_simd_shift_imm_hi): Likewise.
+	(aarch64_simd_shift_imm_si): Likewise.
+	(aarch64_simd_shift_imm_di): Likewise.
+	(aarch64_simd_shift_imm_offset_qi): Likewise.
+	(aarch64_simd_shift_imm_offset_hi): Likewise.
+	(aarch64_simd_shift_imm_offset_si): Likewise.
+	(aarch64_simd_shift_imm_offset_di): Likewise.
+	(aarch64_simd_shift_imm_bitsize_qi): Likewise.
+	(aarch64_simd_shift_imm_bitsize_hi): Likewise.
+	(aarch64_simd_shift_imm_bitsize_si): Likewise.
+	(aarch64_simd_shift_imm_bitsize_di): Likewise.
+
+2015-02-15  Michael Collison  <michael.collison@linaro.org>
+
+	* LINARO-VERSION: Bump version.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+        * LINARO-VERSION: Update.
+
+2015-02-10  Michael Collison  <michael.collison@linaro.org>
+
+	Backport from trunk r217175, r217185, r217186.
+	2014-11-06  Hale Wang  <hale.wang@arm.com>
+
+	* config/arm/arm-cores.def: Add support for
+	-mcpu=cortex-m0.small-multiply,cortex-m0plus.small-multiply,
+	cortex-m1.small-multiply.
+	* config/arm/arm-tables.opt: Regenerate.
+	* config/arm/arm-tune.md: Regenerate.
+	* config/arm/arm.c: Update the rtx-costs for MUL.
+	* config/arm/bpabi.h: Handle
+	-mcpu=cortex-m0.small-multiply,cortex-m0plus.small-multiply,
+	cortex-m1.small-multiply.
+	* doc/invoke.texi: Document
+	-mcpu=cortex-m0.small-multiply,cortex-m0plus.small-multiply,
+	cortex-m1.small-multiply.
+
+2015-02-10  Michael Collison  <michael.collison@linaro.org>
+
+	Backport from trunk r217091.
+	2014-11-04  Jiong Wang  <jiong.wang@arm.com>
+	2014-11-04  Wilco Dijkstra  <wilco.dijkstra@arm.com>
+
+	PR target/63293
+	* config/aarch64/aarch64.c (aarch64_expand_epiloue): Add barriers before
+	stack adjustment.
+
+2015-02-10  Michael Collison  <michael.collison@linaro.org>
+
+	Backport from trunk r217118.
+	2014-11-05  Alex Velenko  <Alex.Velenko@arm.com>
+
+	* simplify-rtx.c (simplify_binary_operation_1): Div check added.
+	* rtl.h (SUBREG_P): New macro added.
+
+2015-02-10  Michael Collison  <michael.collison@linaro.org>
+
+	Backport from trunk r217215.
+	2014-11-07  Jiong Wang  <jiong.wang@arm.com>
+	2014-11-07  Richard Biener  <rguenther@suse.de>
+
+	PR tree-optimization/63676
+	* gimple-fold.c (fold_gimple_assign): Do not fold node when
+	TREE_CLOBBER_P be true.
+
+2015-02-10  Michael Collison  <michael.collison@linaro.org>
+
+	Backport from trunk r219583.
+	2015-01-14  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	PR target/64460
+	* config/arm/arm.md (*<arith_shift_insn>_multsi): Set 'shift' to 2.
+	(*<arith_shift_insn>_shiftsi): Set 'shift' attr to 3.
+
+2015-02-10  Michael Collison  <michael.collison@linaro.org>
+
+	Backport from trunk r217430.
+	2014-11-12  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
+
+	* config/arm/arm.c (*<arith_shift_insn>_shiftsi): Fix typo.
+
+2015-02-10  Michael Collison  <michael.collison@linaro.org>
+
+	Backport from trunk r217431.
+	2014-11-12  Jiong Wang  <jiong.wang@arm.com>
+
+	* config/aarch64/aarch64.h (CALL_USED_REGISTERS): Mark LR as
+	caller-save.
+	(EPILOGUE_USES): Guard the check by epilogue_completed.
+	* config/aarch64/aarch64.c (aarch64_layout_frame): Explictly check for
+	LR.
+	(aarch64_can_eliminate): Check LR_REGNUM liveness.
+
+2015-02-10  Michael Collison  <michael.collison@linaro.org>
+
+	Backport from trunk r219718.
+	* expmed.c (store_bit_field_using_insv): Improve warning message.
+	Use %wu instead of HOST_WIDE_INT_PRINT_UNSIGNED.
+
+	2015-01-15  Jiong Wang  <jiong.wang@arm.com>
+
+2015-02-10  Michael Collison  <michael.collison@linaro.org>
+
+	Backport from trunk r219717.
+	2015-01-15  Jiong Wang  <jiong.wang@arm.com>
+
+	PR rtl-optimization/64011
+	* expmed.c (store_bit_field_using_insv): Warn and truncate bitsize when
+	there is partial overflow.
+
+2015-02-10  Michael Collison  <michael.collison@linaro.org>
+
+	Backport from trunk r217331.
+	2014-11-11  Bin Cheng  <bin.cheng@arm.com>
+
+	* sched-deps.c (sched_analyze_1): Check pending list if it is not
+	less than MAX_PENDING_LIST_LENGTH.
+	(sched_analyze_2, sched_analyze_insn, deps_analyze_insn): Ditto.
+
+2015-02-09  Michael Collison  <michael.collison@linaro.org>
+
+	Backport from trunk r216779.
+	2014-10-28  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* expr.c (expand_expr_real_2): Remove code handling VEC_LSHIFT_EXPR.
+	* fold-const.c (const_binop): Likewise.
+	* cfgexpand.c (expand_debug_expr): Likewise.
+	* tree-inline.c (estimate_operator_cost): Likewise.
+	* tree-vect-generic.c (expand_vector_operations_1): Likewise.
+	* optabs.c (optab_for_tree_code): Likewise.
+	(expand_vec_shift_expr): Likewise, update comment.
+	* tree.def: Delete VEC_LSHIFT_EXPR, remove comment.
+	* optabs.h (expand_vec_shift_expr): Remove comment re. VEC_LSHIFT_EXPR.
+	* optabs.def: Remove vec_shl_optab.
+	* doc/md.texi: Remove references to vec_shr_m.
+
+2015-02-09  Michael Collison  <michael.collison@linaro.org>
+
+	Backport from trunk r216742.
+	2014-10-27  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* config/aarch64/aarch64.c (TARGET_GIMPLE_FOLD_BUILTIN): Define again.
+	* config/aarch64/aarch64-builtins.c (aarch64_gimple_fold_builtin):
+	Restore, enable for bigendian, update to use __builtin..._scal...
+
+2015-02-09  Michael Collison  <michael.collison@linaro.org>
+
+	Backport from trunk r216741.
+	2014-10-27  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* config/aarch64/aarch64-simd-builtins.def (reduc_smax_, reduc_smin_,
+	reduc_umax_, reduc_umin_, reduc_smax_nan_, reduc_smin_nan_): Remove.
+	(reduc_smax_scal_, reduc_smin_scal_, reduc_umax_scal_,
+	reduc_umin_scal_, reduc_smax_nan_scal_, reduc_smin_nan_scal_): New.
+
+	* config/aarch64/aarch64-simd.md
+	(reduc_<maxmin_uns>_<mode>): Rename VDQV_S variant to...
+	(reduc_<maxmin_uns>_internal<mode>): ...this.
+	(reduc_<maxmin_uns>_<mode>): New (VDQ_BHSI).
+	(reduc_<maxmin_uns>_scal_<mode>): New (*2).
+
+	(reduc_<maxmin_uns>_v2si): Combine with below, renaming...
+	(reduc_<maxmin_uns>_<mode>): Combine V2F with above, renaming...
+	(reduc_<maxmin_uns>_internal_<mode>): ...to this (VDQF).
+
+	* config/aarch64/arm_neon.h (vmaxv_f32, vmaxv_s8, vmaxv_s16,
+	vmaxv_s32, vmaxv_u8, vmaxv_u16, vmaxv_u32, vmaxvq_f32, vmaxvq_f64,
+	vmaxvq_s8, vmaxvq_s16, vmaxvq_s32, vmaxvq_u8, vmaxvq_u16, vmaxvq_u32,
+	vmaxnmv_f32, vmaxnmvq_f32, vmaxnmvq_f64, vminv_f32, vminv_s8,
+	vminv_s16, vminv_s32, vminv_u8, vminv_u16, vminv_u32, vminvq_f32,
+	vminvq_f64, vminvq_s8, vminvq_s16, vminvq_s32, vminvq_u8, vminvq_u16,
+	vminvq_u32, vminnmv_f32, vminnmvq_f32, vminnmvq_f64): Update to use
+	__builtin_aarch64_reduc_..._scal; remove vget_lane wrapper.
+
+2015-02-09  Michael Collison  <michael.collison@linaro.org>
+
+	Backport from trunk r216738.
+	2014-10-27  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* config/aarch64/aarch64-simd-builtins.def
+	(reduc_splus_<mode>/VDQF, reduc_uplus_<mode>/VDQF, reduc_splus_v4sf):
+	Remove.
+	(reduc_plus_scal_<mode>, reduc_plus_scal_v4sf): New.
+
+	* config/aarch64/aarch64-simd.md (reduc_<sur>plus_mode): Remove.
+	(reduc_splus_<mode>, reduc_uplus_<mode>, reduc_plus_scal_<mode>): New.
+
+	(reduc_<sur>plus_mode): Change SUADDV -> UNSPEC_ADDV, rename to...
+	(aarch64_reduc_plus_internal<mode>): ...this.
+
+	(reduc_<sur>plus_v2si): Change SUADDV -> UNSPEC_ADDV, rename to...
+	(aarch64_reduc_plus_internalv2si): ...this.
+
+	(reduc_splus_<mode>/V2F): Rename to...
+	(aarch64_reduc_plus_internal<mode>): ...this.
+
+	* config/aarch64/iterators.md
+	(UNSPEC_SADDV, UNSPEC_UADDV, SUADDV): Remove.
+	(UNSPEC_ADDV): New.
+	(sur): Remove elements for UNSPEC_SADDV and UNSPEC_UADDV.
+
+	* config/aarch64/arm_neon.h (vaddv_s8, vaddv_s16, vaddv_s32, vaddv_u8,
+	vaddv_u16, vaddv_u32, vaddvq_s8, vaddvq_s16, vaddvq_s32, vaddvq_s64,
+	vaddvq_u8, vaddvq_u16, vaddvq_u32, vaddvq_u64, vaddv_f32, vaddvq_f32,
+	vaddvq_f64): Change __builtin_aarch64_reduc_[us]plus_... to
+	__builtin_aarch64_reduc_plus_scal, remove vget_lane wrapper.
+
+2015-02-09  Michael Collison  <michael.collison@linaro.org>
+
+	Backport from trunk r216737.
+	2014-10-27  Alan Lawrence  <alan.lawrence@arm.com>
+
+	PR tree-optimization/61114
+	* doc/md.texi (Standard Names): Add reduc_(plus,[us](min|max))|scal
+	optabs, and note in reduc_[us](plus|min|max) to prefer the former.
+
+	* expr.c (expand_expr_real_2): Use reduc_..._scal if available, fall
+	back to old reduc_...  BIT_FIELD_REF only if not.
+
+	* optabs.c (optab_for_tree_code): for REDUC_(MAX,MIN,PLUS)_EXPR,
+	return the reduce-to-scalar (reduc_..._scal) optab.
+	(scalar_reduc_to_vector): New.
+
+	* optabs.def (reduc_smax_scal_optab, reduc_smin_scal_optab,
+	reduc_plus_scal_optab, reduc_umax_scal_optab, reduc_umin_scal_optab):
+	New.
+
+	* optabs.h (scalar_reduc_to_vector): Declare.
+
+	* tree-vect-loop.c (vectorizable_reduction): Look for optabs reducing
+	to either scalar or vector.
+
+2015-02-09  Michael Collison  <michael.collison@linaro.org>
+
+	Backport from trunk r216736.
+	2014-10-27  Alan Lawrence  <alan.lawrence@arm.com>
+
+	PR tree-optimization/61114
+	* expr.c (expand_expr_real_2): For REDUC_{MIN,MAX,PLUS}_EXPR, add
+	extract_bit_field around optab result.
+
+	* fold-const.c (fold_unary_loc): For REDUC_{MIN,MAX,PLUS}_EXPR, produce
+	scalar not vector.
+
+	* tree-cfg.c (verify_gimple_assign_unary): Check result vs operand type
+	for REDUC_{MIN,MAX,PLUS}_EXPR.
+
+	* tree-vect-loop.c (vect_analyze_loop): Update comment.
+	(vect_create_epilog_for_reduction): For direct vector reduction, use
+	result of tree code directly without extract_bit_field.
+
+	* tree.def (REDUC_MAX_EXPR, REDUC_MIN_EXPR, REDUC_PLUS_EXPR): Update
+	comment.
+
+2015-02-09  Michael Collison  <michael.collison@linaro.org>
+
+	Backport from trunk r216734.
+	2014-10-27  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* config/aarch64/aarch64.c (TARGET_GIMPLE_FOLD_BUILTIN): Comment out.
+	* config/aarch64/aarch64-builtins.c (aarch64_gimple_fold_builtin):
+	Remove using preprocessor directis.
+
+2015-02-09  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r217173, r217174, r217687.
+	2014-11-17  Terry Guo  <terry.guo@arm.com>
+
+	* config/arm/arm.c (arm_issue_rate): Return 2 for cortex-m7.
+	* config/arm/arm.md (generic_sched): Exclude cortex-m7.
+	(generic_vfp): Likewise.
+	* config/arm/cortex-m7.md: Pipeline description for cortex-m7.
+
+	2014-10-06  Hale Wang  <Hale.Wang@arm.com>
+
+	* config/arm/arm.c: Add cortex-m7 tune.
+	* config/arm/arm-cores.def: Use cortex-m7 tune.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	* LINARO-VERSION: Bump version.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+	* LINARO-VERSION: Update.
+
+2015-01-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	Fix Linaro PR #902
+
+	Partial Backport from trunk r211798.
+	2014-06-18  Radovan Obradovic  <robradovic@mips.com>
+		    Tom de Vries  <tom@codesourcery.com>
+
+	* config/arm/arm.c (arm_emit_call_insn): Add IP and CC clobbers to
+	CALL_INSN_FUNCTION_USAGE.
+
+	Backport from trunk r209800.
+	2014-04-25  Tom de Vries  <tom@codesourcery.com>
+
+	* expr.c (clobber_reg_mode): New function.
+	* expr.h (clobber_reg): New function.
+
+2015-01-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r211783.
+	2014-06-18  Charles Baylis  <charles.baylis@linaro.org>
+
+	* config/arm/arm.c (neon_vector_mem_operand): Allow register
+	POST_MODIFY for neon loads and stores.
+	(arm_print_operand): Output post-index register for neon loads and
+	stores.
+
+2015-01-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r218451.
+	2014-12-06  James Greenhalgh  <james.greenhalgh@arm.com>
+		    Sebastian Pop  <s.pop@samsung.com>
+		    Brian Rzycki  <b.rzycki@samsung.com>
+
+	PR tree-optimization/54742
+	* params.def (max-fsm-thread-path-insns, max-fsm-thread-length,
+	max-fsm-thread-paths): New.
+
+	* doc/invoke.texi (max-fsm-thread-path-insns, max-fsm-thread-length,
+	max-fsm-thread-paths): Documented.
+
+	* tree-cfg.c (split_edge_bb_loc): Export.
+	* tree-cfg.h (split_edge_bb_loc): Declared extern.
+
+	* tree-ssa-threadedge.c (simplify_control_stmt_condition): Restore the
+	original value of cond when simplification fails.
+	(fsm_find_thread_path): New.
+	(fsm_find_control_statement_thread_paths): New.
+	(thread_through_normal_block): Call find_control_statement_thread_paths.
+
+	* tree-ssa-threadupdate.c (dump_jump_thread_path): Pretty print
+	EDGE_FSM_THREAD.
+	(verify_seme): New.
+	(duplicate_seme_region): New.
+	(thread_through_all_blocks): Generate code for EDGE_FSM_THREAD edges
+	calling duplicate_seme_region.
+
+	* tree-ssa-threadupdate.h (jump_thread_edge_type): Add EDGE_FSM_THREAD.
+
+2015-01-13  Michael Collison  <michael.collison@linaro.org>
+
+	Backport from trunk r217394.
+	2014-11-11  Andrew Pinski  <apinski@cavium.com>
+
+	Bug target/61997
+	* config.gcc (aarch64*-*-*): Set target_gtfiles to include
+	aarch64-builtins.c.
+	* config/aarch64/aarch64-builtins.c: Include gt-aarch64-builtins.h
+	at the end of the file.
+
+2015-01-13  Michael Collison  <michael.collison@linaro.org>
+
+	Backport from trunk r216267, r216547, r216548, r217072, r217192, r217405,
+	r217406, r217768.
+	2014-11-19  Renlin Li  <renlin.li@arm.com>
+
+	* config/aarch64/aarch64.h (TARGET_CPU_CPP_BUILTINS): Define __ARM_FP_FAST,
+	__ARM_FEATURE_FMA, __ARM_FP, __ARM_FEATURE_NUMERIC_MAXMIN, __ARM_NEON_FP.
+
+	2014-11-12  Tejas Belagod  <tejas.belagod@arm.com>
+
+	* Makefile.in (TEXI_GCC_FILES): Remove arm-acle-intrinsics.texi,
+	arm-neon-intrinsics.texi, aarch64-acle-intrinsics.texi.
+	* doc/aarch64-acle-intrinsics.texi: Remove.
+	* doc/arm-acle-intrinsics.texi: Remove.
+	* doc/arm-neon-intrinsics.texi: Remove.
+	* doc/extend.texi: Consolidate sections AArch64 intrinsics,
+	ARM NEON Intrinsics, ARM ACLE Intrinsics into one ARM C Language
+	Extension section. Add references to public ACLE specification.
+
+	2014-11-06  Renlin Li  <renlin.li@arm.com>
+
+	* config/aarch64/aarch64.c (aarch64_architecture_version): New.
+	(processor): New architecture_version field.
+	(aarch64_override_options): Initialize aarch64_architecture_version.
+	* config/aarch64/aarch64.h (TARGET_CPU_CPP_BUILTINS): Define __ARM_ARCH,
+	__ARM_ARCH_PROFILE, aarch64_arch_name macro.
+
+	2014-11-04  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
+
+	* config/arm/arm.h (TARGET_CPU_CPP_BUILTINS): Fix typo in definition
+	of __ARM_FEATURE_IDIV.
+
+	2014-10-22  Jiong Wang <jiong.wang@arm.com>
+
+	* config/arm/arm.h (TARGET_CPU_CPP_BUILTINS): Add missing '\'.
+
+	2014-10-22  Renlin Li <renlin.li@arm.com>
+
+	* config/arm/arm.h (TARGET_CPU_CPP_BUILTINS): Define
+	__ARM_FEATURE_IDIV__.
+
+	2014-10-15  Renlin Li <renlin.li@arm.com>
+
+	* config/aarch64/aarch64.h (TARGET_CPU_CPP_BUILTINS): Define
+	__ARM_BIG_ENDIAN, __ARM_SIZEOF_MINIMAL_ENUM. Add __ARM_64BIT_STATE,
+	__ARM_ARCH_ISA_A64, __ARM_FEATURE_CLZ, __ARM_FEATURE_IDIV,
+	__ARM_FEATURE_UNALIGNED, __ARM_PCS_AAPCS64, __ARM_SIZEOF_WCHAR_T.
+
+2015-01-13  Michael Collison  <michael.collison@linaro.org>
+
+	Backport from trunk r211789, r211790, r211791, r211792, r211793, r211794,
+	r211795, r211796, r211797.
+	2014-06-18  Charles Baylis  <charles.baylis@linaro.org>
+
+	* config/arm/bpabi.c (__gnu_uldivmod_helper): Remove.
+
+	2014-06-18  Charles Baylis  <charles.baylis@linaro.org>
+
+	* config/arm/bpabi-v6m.S (__aeabi_uldivmod): Perform division using
+	__udivmoddi4.
+
+	2014-06-18  Charles Baylis  <charles.baylis@linaro.org>
+
+	* config/arm/bpabi.S (__aeabi_ldivmod, __aeabi_uldivmod,
+	push_for_divide, pop_for_divide): Use .cfi_* directives for DWARF
+	annotations. Fix DWARF information.
+
+	2014-06-18  Charles Baylis  <charles.baylis@linaro.org>
+
+	* config/arm/bpabi.S (__aeabi_ldivmod): Perform division using
+	__udivmoddi4, and fixups for negative operands.
+
+	2014-06-18  Charles Baylis  <charles.baylis@linaro.org>
+
+	* config/arm/bpabi.S (__aeabi_ldivmod): Optimise stack manipulation.
+
+	2014-06-18  Charles Baylis  <charles.baylis@linaro.org>
+
+	* config/arm/bpabi.S (__aeabi_uldivmod): Perform division using call
+	to __udivmoddi4.
+
+	2014-06-18  Charles Baylis  <charles.baylis@linaro.org>
+
+	* config/arm/bpabi.S (__aeabi_uldivmod): Optimise stack pointer
+	manipulation.
+
+	2014-06-18  Charles Baylis  <charles.baylis@linaro.org>
+
+	* config/arm/bpabi.S (__aeabi_uldivmod, __aeabi_ldivmod): Add comment
+	describing register usage on function entry and exit.
+
+	2014-06-18  Charles Baylis  <charles.baylis@linaro.org>
+
+	* config/arm/bpabi.S (__aeabi_uldivmod): Fix whitespace.
+	(__aeabi_ldivmod): Fix whitespace.
+
+2015-01-13  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r217593.
+	2014-11-14  Andrew Pinski  <apinski@cavium.com>
+
+	* config/aarch64/aarch64-cores.def (thunderx): Change the scheduler
+	over to thunderx.
+	* config/aarch64/aarch64.md: Include thunderx.md.
+	(generic_sched): Set to no for thunderx.
+	* config/aarch64/thunderx.md: New file.
+
+2015-01-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r217717.
+	2014-11-18  Felix Yang  <felix.yang@huawei.com>
+
+	* config/aarch64/aarch64.c (doloop_end): New pattern.
+	* config/aarch64/aarch64.md (TARGET_CAN_USE_DOLOOP_P): Implement.
+
+2015-01-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r217661.
+	2014-11-17  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/aarch64/aarch64-cores.def (cortex-a53): Remove
+	AARCH64_FL_CRYPTO from feature flags.
+	(cortex-a57): Likewise.
+	(cortex-a57.cortex-a53): Likewise.
+
+2015-01-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r218319.
+	2014-12-03  Andrew Stubbs  <ams@codesourcery.com>
+
+	Revert:
+
+	2014-09-17  Andrew Stubbs  <ams@codesourcery.com>
+
+	* config/arm/arm.c (arm_option_override): Reject -mfpu=neon
+	when architecture is older than ARMv7.
+
+2015-01-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r217691.
+	2014-11-18  Jiong Wang  <jiong.wang@arm.com>
+
+	* lra-eliminations.c (update_reg_eliminate): Relax gcc_assert for fixed
+	registers.
+
+2015-01-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r215503.
+	2014-09-23  Wilco Dijkstra  <wdijkstr@arm.com>
+
+	* common/config/aarch64/aarch64-common.c:
+	(default_options aarch_option_optimization_table):
+	Default to -fsched-pressure.
+
+2015-01-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r211132.
+	2014-06-02  Tom de Vries  <tom@codesourcery.com>
+
+	* config/aarch64/aarch64.c (aarch64_float_const_representable_p): Handle
+	case that x has VOIDmode.
+
+2015-01-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209620.
+	2014-04-22  Vidya Praveen  <vidyapraveen@arm.com>
+
+	* aarch64.md (float<GPI:mode><GPF:mode>2): Remove.
+	(floatuns<GPI:mode><GPF:mode>2): Remove.
+	(<optab><fcvt_target><GPF:mode>2): New pattern for equal width float
+	and floatuns conversions.
+	(<optab><fcvt_iesize><GPF:mode>2): New pattern for inequal width float
+	and floatuns conversions.
+	* iterators.md (fcvt_target, FCVT_TARGET): Support SF and DF modes.
+	(w1,w2): New mode attributes for inequal width conversions.
+
+2015-01-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r217362, r217546.
+	2014-11-14  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
+
+	PR target/63724
+        * config/aarch64/aarch64.c (aarch64_expand_mov_immediate): Split out
+        numerical immediate handling to...
+        (aarch64_internal_mov_immediate): ...this. New.
+        (aarch64_rtx_costs): Use aarch64_internal_mov_immediate.
+        (aarch64_mov_operand_p): Relax predicate.
+        * config/aarch64/aarch64.md (mov<mode>:GPI): Do not expand CONST_INTs.
+        (*movsi_aarch64): Turn into define_insn_and_split and new alternative
+        for 'n'.
+        (*movdi_aarch64): Likewise.
+
+	2014-11-11  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* config/aarch64/aarch64-simd.md
+	(aarch64_simd_bsl<mode>_internal): Remove float cases, canonicalize.
+	(aarch64_simd_bsl<mode>): Add gen_lowpart expressions where we
+	are punning between float vectors and integer vectors.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	* LINARO-VERSION: Bump version.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+	* LINARO-VERSION: Update.
+
+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r217079, r217080.
+	2014-11-04  Alan Lawrence  <alan.lawrence@arm.com>
+
+	config/arm/neon.md (reduc_smin_<mode> *2): Rename to...
+	(reduc_smin_scal_<mode> *2): ...this; extract scalar result.
+	(reduc_smax_<mode> *2): Rename to...
+	(reduc_smax_scal_<mode> *2): ...this; extract scalar result.
+	(reduc_umin_<mode> *2): Rename to...
+	(reduc_umin_scal_<mode> *2): ...this; extract scalar result.
+	(reduc_umax_<mode> *2): Rename to...
+	(reduc_umax_scal_<mode> *2): ...this; extract scalar result.
+
+	2014-11-04  Alan Lawrence  <alan.lawrence@arm.com>
+
+	config/arm/neon.md (reduc_plus_*): Rename to...
+	(reduc_plus_scal_*): ...this; reduce to temp and extract scalar result.
+
+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
+
+	Fix Backport from trunk r216524 (committed at r218379).
+	Add missing file: config/aarch64/aarch64-cost-tables.h
+
+	* config/aarch64/aarch64-cost-tables.h: New file.
+
+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r217076.
+	2014-11-04  Michael Collison <michael.collison@linaro.org>
+
+	* config/aarch64/iterators.md (lconst_atomic): New mode attribute
+	to support constraints for CONST_INT in atomic operations.
+	* config/aarch64/atomics.md
+	(atomic_<atomic_optab><mode>): Use lconst_atomic constraint.
+	(atomic_nand<mode>): Likewise.
+	(atomic_fetch_<atomic_optab><mode>): Likewise.
+	(atomic_fetch_nand<mode>): Likewise.
+	(atomic_<atomic_optab>_fetch<mode>): Likewise.
+	(atomic_nand_fetch<mode>): Likewise.
+
+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r217026.
+	2014-11-03  Zhenqiang Chen  <zhenqiang.chen@arm.com>
+
+	* ifcvt.c (noce_emit_cmove, noce_get_alt_condition, noce_get_condition):
+	Allow CC mode if HAVE_cbranchcc4.
+
+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r217014.
+	2014-11-02  Michael Collison  <michael.collison@linaro.org>
+
+	* config/arm/arm.h (CLZ_DEFINED_VALUE_AT_ZERO) : Update
+	to support vector modes.
+	(CTZ_DEFINED_VALUE_AT_ZERO): Ditto.
+
+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r216996, r216998, r216999, r217001, r217002, r217003,
+	r217004, r217742.
+	2014-11-18  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	PR target/63937
+	* target.def (use_by_pieces_infrastructure_p): Take unsigned
+	HOST_WIDE_INT as the size parameter.
+	* targhooks.c (default_use_by_pieces_infrastructure_p): Likewise.
+	* targhooks.h (default_use_by_pieces_infrastructure_p): Likewise.
+	* config/arc/arc.c (arc_use_by_pieces_infrastructure_p)): Likewise.
+	* config/mips/mips.c (mips_use_by_pieces_infrastructure_p)): Likewise.
+	* config/s390/s390.c (s390_use_by_pieces_infrastructure_p)): Likewise.
+	* config/sh/sh.c (sh_use_by_pieces_infrastructure_p)): Likewise.
+	* config/aarch64/aarch64.c
+	(aarch64_use_by_pieces_infrastructure_p)): Likewise.
+	* doc/tm.texi: Regenerate.
+
+	2014-11-01  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* doc/tm.texi.in (MOVE_BY_PIECES_P): Remove.
+	(CLEAR_BY_PIECES_P): Likewise.
+	(SET_BY_PIECES_P): Likewise.
+	(STORE_BY_PIECES_P): Likewise.
+	* doc/tm.texi: Regenerate.
+	* system.h: Poison MOVE_BY_PIECES_P, CLEAR_BY_PIECES_P,
+	SET_BY_PIECES_P, STORE_BY_PIECES_P.
+	* expr.c (MOVE_BY_PIECES_P): Remove.
+	(CLEAR_BY_PIECES_P): Likewise.
+	(SET_BY_PIECES_P): Likewise.
+	(STORE_BY_PIECES_P): Likewise.
+	(can_move_by_pieces): Rewrite in terms of
+	targetm.use_by_pieces_infrastructure_p.
+	(emit_block_move_hints): Likewise.
+	(can_store_by_pieces): Likewise.
+	(store_by_pieces): Likewise.
+	(clear_storage_hints): Likewise.
+	(emit_push_insn): Likewise.
+	(expand_constructor): Likewise.
+
+	2014-11-01  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* config/aarch64/aarch64.c
+	(aarch64_use_by_pieces_infrastructre_p): New.
+	(TARGET_USE_BY_PIECES_INFRASTRUCTURE): Likewise.
+	* config/aarch64/aarch64.h (STORE_BY_PIECES_P): Delete.
+
+	2014-11-01  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* config/mips/mips.h (MOVE_BY_PIECES_P): Remove.
+	(STORE_BY_PIECES_P): Likewise.
+	* config/mips/mips.c (TARGET_USE_BY_PIECES_INFRASTRUCTURE_P): New.
+	(mips_move_by_pieces_p): Rename to...
+	(mips_use_by_pieces_infrastructure_p): ...this, use new hook
+	parameters, use the default hook implementation as a
+	fall-back.
+
+	2014-11-01  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* config/sh/sh.c (TARGET_USE_BY_PIECES_INFRASTRUCTURE_P): New.
+	(sh_use_by_pieces_infrastructure_p): Likewise.
+	* config/sh/sh.h (MOVE_BY_PIECES_P): Remove.
+	(STORE_BY_PIECES_P): Likewise.
+	(SET_BY_PIECES_P): Likewise.
+
+	2014-11-01  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* config/arc/arc.c (TARGET_USE_BY_PIECES_INFRASTRUCTURE_P): New.
+	(arc_use_by_pieces_infrastructure_p): Likewise.
+	* confir/arc/arc.h (MOVE_BY_PIECES_P): Delete.
+	(CAN_MOVE_BY_PIECES): Likewise.
+
+	2014-11-01  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* config/s390/s390.c (s390_use_by_pieces_infrastructure_p): New.
+	(TARGET_USE_BY_PIECES_INFRASTRUCTURE_P): Likewise.
+	* config/s390/s390.h (MOVE_BY_PIECES_P): Remove.
+	(CLEAR_BY_PIECES): Likewise.
+	(SET_BY_PIECES): Likewise.
+	(STORE_BY_PIECES): Likewise.
+
+	2014-11-01  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* target.def (use_by_pieces_infrastructure_p): New.
+	* doc/tm.texi.in (MOVE_BY_PIECES_P): Describe that this macro
+	is deprecated.
+	(STORE_BY_PIECES_P): Likewise.
+	(CLEAR_BY_PIECES_P): Likewise.
+	(SET_BY_PIECES_P): Likewise.
+	(TARGET_MOVE_BY_PIECES_PROFITABLE_P): Add hook.
+	* doc/tm.texi: Regenerate.
+	* expr.c (MOVE_BY_PIECES_P): Rewrite in terms of
+	TARGET_USE_BY_PIECES_INFRASTRUCTURE_P.
+	(STORE_BY_PIECES_P): Likewise.
+	(CLEAR_BY_PIECES_P): Likewise.
+	(SET_BY_PIECES_P): Likewise.
+	(STORE_MAX_PIECES): Move to...
+	* defaults.h (STORE_MAX_PIECES): ...here.
+	* targhooks.c (get_move_ratio): New.
+	(default_use_by_pieces_infrastructure_p): Likewise.
+	* targhooks.h (default_use_by_pieces_infrastructure_p): New.
+	* target.h (by_pieces_operation): New.
+
+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r216765.
+	2014-10-27  Jiong Wang <jiong.wang@arm.com>
+
+	PR target/63442
+	* optabs.c (prepare_cmp_insn): Use "ret_mode" instead of "word_mode".
+
+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r216630.
+	2014-10-24  Felix Yang  <felix.yang@huawei.com>
+	Jiji Jiang  <jiangjiji@huawei.com>
+
+	PR target/63173
+	* config/aarch64/arm_neon.h (__LD2R_FUNC): Remove macro.
+	(__LD3R_FUNC): Ditto.
+	(__LD4R_FUNC): Ditto.
+	(vld2_dup_s8, vld2_dup_s16, vld2_dup_s32, vld2_dup_f32, vld2_dup_f64,
+	 vld2_dup_u8, vld2_dup_u16, vld2_dup_u32, vld2_dup_p8, vld2_dup_p16
+	 vld2_dup_s64, vld2_dup_u64, vld2q_dup_s8, vld2q_dup_p8, 
+	 vld2q_dup_s16, vld2q_dup_p16, vld2q_dup_s32, vld2q_dup_s64, 
+	 vld2q_dup_u8, vld2q_dup_u16, vld2q_dup_u32, vld2q_dup_u64 
+	 vld2q_dup_f32, vld2q_dup_f64): Rewrite using builtin functions.
+	(vld3_dup_s64, vld3_dup_u64, vld3_dup_f64, vld3_dup_s8 
+	 vld3_dup_p8, vld3_dup_s16, vld3_dup_p16, vld3_dup_s32 
+	 vld3_dup_u8, vld3_dup_u16, vld3_dup_u32, vld3_dup_f32
+	 vld3q_dup_s8, vld3q_dup_p8, vld3q_dup_s16, vld3q_dup_p16 
+	 vld3q_dup_s32, vld3q_dup_s64, vld3q_dup_u8, vld3q_dup_u16 
+	 vld3q_dup_u32, vld3q_dup_u64, vld3q_dup_f32, vld3q_dup_f64): Likewise.
+	(vld4_dup_s64, vld4_dup_u64, vld4_dup_f64, vld4_dup_s8 
+	 vld4_dup_p8, vld4_dup_s16, vld4_dup_p16, vld4_dup_s32 
+	 vld4_dup_u8, vld4_dup_u16, vld4_dup_u32, vld4_dup_f32 
+	 vld4q_dup_s8, vld4q_dup_p8, vld4q_dup_s16, vld4q_dup_p16 
+	 vld4q_dup_s32, vld4q_dup_s64, vld4q_dup_u8, vld4q_dup_u16 
+	 vld4q_dup_u32, vld4q_dup_u64, vld4q_dup_f32, vld4q_dup_f64): Likewise.
+	* config/aarch64/aarch64.md (define_c_enum "unspec"): Add
+	UNSPEC_LD2_DUP, UNSPEC_LD3_DUP, UNSPEC_LD4_DUP.
+	* config/aarch64/aarch64-simd-builtins.def (ld2r, ld3r, ld4r): New
+	builtins.
+	* config/aarch64/aarch64-simd.md (aarch64_simd_ld2r<mode>): New pattern.
+	(aarch64_simd_ld3r<mode>): Likewise.
+	(aarch64_simd_ld4r<mode>): Likewise.
+	(aarch64_ld2r<mode>): New expand.
+	(aarch64_ld3r<mode>): Likewise.
+	(aarch64_ld4r<mode>): Likewise.
+
+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r217971.
+	2014-11-22  Uros Bizjak  <ubizjak@gmail.com>
+
+	* params.def (PARAM_MAX_COMPLETELY_PEELED_INSNS): Increase to 200.
+	* config/i386/i386.c (ix86_option_override_internal): Do not increase
+	PARAM_MAX_COMPLETELY_PEELED_INSNS.
+
+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r216524.
+	2014-10-21  Andrew Pinski  <apinski@cavium.com>
+
+	* doc/invoke.texi (AARCH64/mtune): Document thunderx as an
+	available option also.
+	* config/aarch64/aarch64-cost-tables.h: New file.
+	* config/aarch64/aarch64-cores.def (thunderx): New core.
+	* config/aarch64/aarch64-tune.md: Regenerate.
+	* config/aarch64/aarch64.c: Include aarch64-cost-tables.h instead
+	of config/arm/aarch-cost-tables.h.
+	(thunderx_regmove_cost): New variable.
+	(thunderx_tunings): New variable.
+
+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r216336.
+	2014-10-16  Richard Earnshaw  <rearnsha@arm.com>
+
+	* config/aarch64/aarch64.c (aarch64_legitimize_address): New function.
+	(TARGET_LEGITIMIZE_ADDRESS): Redefine.
+
+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r216253.
+	2014-10-15  Renlin Li <renlin.li@arm.com>
+
+	* config/aarch64/aarch64.h (ARM_DEFAULT_PCS, arm_pcs_variant): Delete.
+
+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r215711.
+	2014-09-30  Terry Guo  <terry.guo@arm.com>
+
+	* config/arm/arm-cores.def (cortex-m7): New core name.
+	* config/arm/arm-fpus.def (fpv5-sp-d16): New fpu name.
+	(fpv5-d16): Ditto.
+	* config/arm/arm-tables.opt: Regenerated.
+	* config/arm/arm-tune.md: Regenerated.
+	* config/arm/arm.h (TARGET_VFP5): New macro.
+	* config/arm/bpabi.h (BE8_LINK_SPEC): Include cortex-m7.
+	* config/arm/vfp.md (<vrint_pattern><SDF:mode>2,
+	smax<mode>3, smin<mode>3): Enabled for FPU FPv5.
+	* doc/invoke.texi: Document new cpu and fpu names.
+
+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r215707, r215842.
+	2014-10-03  David Sherwood  <david.sherwood@arm.com>
+
+	* ira-int.h (ira_allocno): Mark hard_regno as signed.
+
+	2014-09-30  David Sherwood  <david.sherwood@arm.com>
+
+	* ira-int.h (ira_allocno): Add "wmode" field.
+	* ira-build.c (create_insn_allocnos): Add new "parent" function
+	parameter.
+	* ira-conflicts.c (ira_build_conflicts): Add conflicts for registers
+	that cannot be accessed in wmode.
+
+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r215540.
+	2014-09-24  Zhenqiang Chen  <zhenqiang.chen@arm.com>
+
+	PR rtl-optimization/63210
+	* ira-color.c (assign_hard_reg): Ignore conflict cost if the
+	HARD_REGNO is not available for CONFLICT_A.
+
+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r215046.
+	2014-09-09  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	PR target/61749
+	* config/aarch64/aarch64-builtins.c (aarch64_types_quadop_qualifiers):
+	Use qualifier_immediate for last operand.  Rename to...
+	(aarch64_types_ternop_lane_qualifiers): ... This.
+	(TYPES_QUADOP): Rename to...
+	(TYPES_TERNOP_LANE): ... This.
+	(aarch64_simd_expand_args): Return const0_rtx when encountering user
+	error.  Change return of 0 to return of NULL_RTX.
+	(aarch64_crc32_expand_builtin): Likewise.
+	(aarch64_expand_builtin): Return NULL_RTX instead of 0.
+	ICE when expanding unknown builtin.
+	* config/aarch64/aarch64-simd-builtins.def (sqdmlal_lane): Use
+	TERNOP_LANE qualifiers.
+	(sqdmlsl_lane): Likewise.
+	(sqdmlal_laneq): Likewise.
+	(sqdmlsl_laneq): Likewise.
+	(sqdmlal2_lane): Likewise.
+	(sqdmlsl2_lane): Likewise.
+	(sqdmlal2_laneq): Likewise.
+	(sqdmlsl2_laneq): Likewise.
+
+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r215013.
+	2014-09-08  Joseph Myers  <joseph@codesourcery.com>
+
+	* defaults.h (LARGEST_EXPONENT_IS_NORMAL, ROUND_TOWARDS_ZERO):
+	Remove.
+	* doc/tm.texi.in (ROUND_TOWARDS_ZERO, LARGEST_EXPONENT_IS_NORMAL):
+	Remove.
+	* doc/tm.texi: Regenerate.
+	* system.h (LARGEST_EXPONENT_IS_NORMAL, ROUND_TOWARDS_ZERO):
+	Poison.
+	* config/arm/arm.h (LARGEST_EXPONENT_IS_NORMAL): Remove.
+	* config/cris/cris.h (__make_dp): Remove.
+
+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r214952.
+	2014-09-05  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* config/aarch64/arm_neon.h (__GET_HIGH): New macro.
+	(vget_high_f32, vget_high_f64, vget_high_p8, vget_high_p16,
+	vget_high_s8, vget_high_s16, vget_high_s32, vget_high_s64,
+	vget_high_u8, vget_high_u16, vget_high_u32, vget_high_u64):
+	Remove temporary __asm__ and reimplement.
+
+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r214948, r214949.
+	2014-09-05  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* config/aarch64/aarch64-builtins.c (aarch64_fold_builtin): Remove code
+	handling cmge, cmgt, cmeq, cmtst.
+
+	* config/aarch64/aarch64-simd-builtins.def (cmeq, cmge, cmgt, cmle,
+	cmlt, cmgeu, cmgtu, cmtst): Remove.
+
+	* config/aarch64/arm_neon.h (vceq_*, vceqq_*, vceqz_*, vceqzq_*,
+	vcge_*, vcgeq_*, vcgez_*, vcgezq_*, vcgt_*, vcgtq_*, vcgtz_*,
+	vcgtzq_*, vcle_*, vcleq_*, vclez_*, vclezq_*, vclt_*, vcltq_*,
+	vcltz_*, vcltzq_*, vtst_*, vtstq_*): Use gcc vector extensions.
+
+	2014-09-05  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* config/aarch64/aarch64-builtins.c (aarch64_types_cmtst_qualifiers,
+	TYPES_TST): Define.
+	(aarch64_fold_builtin): Update pattern for cmtst.
+
+	* config/aarch64/aarch64-protos.h (aarch64_const_vec_all_same_int_p):
+	Declare.
+
+	* config/aarch64/aarch64-simd-builtins.def (cmtst): Update qualifiers.
+
+	* config/aarch64/aarch64-simd.md (aarch64_vcond_internal<mode><mode>):
+	Switch operands, separate out more cases, refactor.
+
+	(aarch64_cmtst<mode>): Rewrite pattern to match (plus ... -1).
+
+	* config/aarch64.c (aarch64_const_vec_all_same_int_p): Take single
+	argument; rename old version to...
+	(aarch64_const_vec_all_same_in_range_p): ...this.
+	(aarch64_print_operand, aarch64_simd_shift_imm_p): Follow renaming.
+
+	* config/aarch64/predicates.md (aarch64_simd_imm_minus_one): Define.
+
+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r214008.
+	2014-08-15  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/aarch64/aarch64.c (aarch64_expand_mov_immediate): Move
+	one_match > zero_match case to just before simple_sequence.
+
+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r213382.
+	2014-07-31  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* config/aarch64/arm_neon.h (vpadd_<suf><8,16,32,64>): Move to
+	correct alphabetical position.
+	(vpaddd_f64): Rewrite using builtins.
+	(vpaddd_s64): Move to correct alphabetical position.
+	(vpaddd_u64): New.
+
+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r210735, r215206, r215207, r215208.
+	2014-09-12  Wilco Dijkstra  <wilco.dijkstra@arm.com>
+
+	* gcc/config/aarch64/aarch64.c (cortexa57_regmove_cost): New cost table
+	for A57.
+	(cortexa53_regmove_cost): New cost table for A53.  Increase GP2FP/FP2GP
+	cost to spilling from integer to FP registers.
+
+	2014-09-12  Wilco Dijkstra  <wilco.dijkstra@arm.com>
+
+	* config/aarch64/aarch64.c (aarch64_register_move_cost): Fix Q register
+	move handling.
+	(generic_regmove_cost): Undo raised FP2FP move cost as Q register moves
+	are now handled correctly.
+
+	2014-09-12  Wilco Dijkstra  <wilco.dijkstra@arm.com>
+
+	* config/aarch64/aarch64.c (aarch64_register_move_cost): Add cost
+	handling of CALLER_SAVE_REGS and POINTER_REGS.
+
+	2014-05-22  Kugan Vivekanandarajah  <kuganv@linaro.org>
+
+	* config/aarch64/aarch64.c (aarch64_regno_regclass) : Change CORE_REGS
+	to GENERAL_REGS.
+	(aarch64_secondary_reload) : LikeWise.
+	(aarch64_class_max_nregs) : Remove CORE_REGS.
+	* config/aarch64/aarch64.h (enum reg_class) : Remove CORE_REGS.
+	(REG_CLASS_NAMES) : Likewise.
+	(REG_CLASS_CONTENTS) : LikeWise.
+	(INDEX_REG_CLASS) : Change CORE_REGS to GENERAL_REGS.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	* LINARO-VERSION: Bump version.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+	* LINARO-VERSION: Update.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	Add Linaro release macros (Linaro only patch.)
+
+	* Makefile.in (LINAROVER, LINAROVER_C, LINAROVER_S): Define.
+	(CFLAGS-cppbuiltin.o): Add LINAROVER macro definition.
+	(cppbuiltin.o): Depend on $(LINAROVER).
+	* cppbuiltin.c (parse_linarover): New.
+	(define_GNUC__): Define __LINARO_RELEASE__ and __LINARO_SPIN__ macros.
+
+2014-11-13  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r216229, r216230.
+	2014-10-14  Andrew Pinski  <apinski@cavium.com>
+
+	* explow.c (convert_memory_address_addr_space): Rename to ...
+	(convert_memory_address_addr_space_1): This.  Add in_const argument.
+	Inside a CONST RTL, permute the conversion and addition of constant
+	for zero and sign extended pointers.
+	(convert_memory_address_addr_space): New function.
+
+	2014-10-14  Andrew Pinski  <apinski@cavium.com>
+
+	Revert:
+	2011-08-19  H.J. Lu  <hongjiu.lu@intel.com>
+
+        PR middle-end/49721
+        * explow.c (convert_memory_address_addr_space): Also permute the
+        conversion and addition of constant for zero-extend.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	* LINARO-VERSION: Bump version.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+	* LINARO-VERSION: Update.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	* LINARO-VERSION: Bump version.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+	* LINARO-VERSION: Update.
+
+2014-10-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	Revert:
+	2014-10-08  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r215206, r215207, r215208.
+	2014-09-12  Wilco Dijkstra  <wilco.dijkstra@arm.com>
+
+	* gcc/config/aarch64/aarch64.c (cortexa57_regmove_cost): New cost table
+	for A57.
+	(cortexa53_regmove_cost): New cost table for A53.  Increase GP2FP/FP2GP
+	cost to spilling from integer to FP registers.
+
+	2014-09-12  Wilco Dijkstra  <wilco.dijkstra@arm.com>
+
+	* config/aarch64/aarch64.c (aarch64_register_move_cost): Fix Q register
+	move handling.
+	(generic_regmove_cost): Undo raised FP2FP move cost as Q register moves
+	are now handled correctly.
+
+	2014-09-12  Wilco Dijkstra  <wilco.dijkstra@arm.com>
+
+	* config/aarch64/aarch64.c (aarch64_register_move_cost): Add cost
+	handling of CALLER_SAVE_REGS and POINTER_REGS.
+
+2014-10-08  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r214825, r214826.
+	2014-09-02  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	PR target/62275
+	* config/arm/neon.md
+	(neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode>
+	<v_cmp_result>): New pattern.
+	* config/arm/iterators.md (NEON_VCVT): New int iterator.
+	* config/arm/arm_neon_builtins.def (vcvtav2sf, vcvtav4sf, vcvtauv2sf,
+	vcvtauv4sf, vcvtpv2sf, vcvtpv4sf, vcvtpuv2sf, vcvtpuv4sf, vcvtmv2sf,
+	vcvtmv4sf, vcvtmuv2sf, vcvtmuv4sf): New builtin definitions.
+	* config/arm/arm.c (arm_builtin_vectorized_function): Handle
+	BUILT_IN_LROUNDF, BUILT_IN_LFLOORF, BUILT_IN_LCEILF.
+
+	2014-09-02  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	PR target/62275
+	* config/arm/iterators.md (FIXUORS): New code iterator.
+	(VCVT): New int iterator.
+	(su_optab): New code attribute.
+	(su): Likewise.
+	* config/arm/vfp.md (l<vrint_pattern><su_optab><mode>si2): New pattern.
+
+2014-10-08  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r215471.
+	2014-09-22  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* config/aarch64/geniterators.sh: New.
+	* config/aarch64/iterators.md (VDQF_DF): New.
+	* config/aarch64/t-aarch64: Generate aarch64-builtin-iterators.h.
+	* config/aarch64/aarch64-builtins.c (BUILTIN_*) Remove.
+
+2014-10-08  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r215206, r215207, r215208.
+	2014-09-12  Wilco Dijkstra  <wilco.dijkstra@arm.com>
+
+	* gcc/config/aarch64/aarch64.c (cortexa57_regmove_cost): New cost table
+	for A57.
+	(cortexa53_regmove_cost): New cost table for A53.  Increase GP2FP/FP2GP
+	cost to spilling from integer to FP registers.
+
+	2014-09-12  Wilco Dijkstra  <wilco.dijkstra@arm.com>
+
+	* config/aarch64/aarch64.c (aarch64_register_move_cost): Fix Q register
+	move handling.
+	(generic_regmove_cost): Undo raised FP2FP move cost as Q register moves
+	are now handled correctly.
+
+	2014-09-12  Wilco Dijkstra  <wilco.dijkstra@arm.com>
+
+	* config/aarch64/aarch64.c (aarch64_register_move_cost): Add cost
+	handling of CALLER_SAVE_REGS and POINTER_REGS.
+
+2014-10-07  Yvan Roux  <yvan.roux@linaro.org>
+ 
+	Backport from trunk r214824.
+	2014-09-02  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/aarch64/predicates.md (aarch64_comparison_operation):
+	New special predicate.
+	* config/aarch64/aarch64.md (*csinc2<mode>_insn): Use
+	aarch64_comparison_operation instead of matching an operator.
+	Update operand numbers.
+	(csinc3<mode>_insn): Likewise.
+	(*csinv3<mode>_insn): Likewise.
+	(*csneg3<mode>_insn): Likewise.
+	(ffs<mode>2): Update gen_csinc3<mode>_insn callsite.
+	* config/aarch64/aarch64.c (aarch64_get_condition_code):
+	Return -1 instead of aborting on invalid condition codes.
+	(aarch64_print_operand): Update aarch64_get_condition_code callsites
+	to assert that the returned condition code is valid.
+	* config/aarch64/aarch64-protos.h (aarch64_get_condition_code): Export.
+
+2014-10-07  Venkataramanan Kumar  <venkataramanan.kumar@linaro.org>
+
+	Backport from trunk r209643, r211881.
+	2014-06-22  Richard Henderson  <rth@redhat.com>
+
+	PR target/61565
+	* compare-elim.c (struct comparison): Add eh_note.
+	(find_comparison_dom_walker::before_dom_children): Don't eliminate
+	a redundant comparison in a different EH region.  Purge EH edges if
+	necessary.
+
+	2014-04-22  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
+
+	* config/aarch64/aarch64.c (TARGET_FLAGS_REGNUM): Define.
+
+2014-10-06  Charles Baylis  <charles.baylis@linaro.org>
+
+	Backport from trunk r214945.
+	2014-09-05  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Replace
+	varargs with pointer parameter.
+	(aarch64_simd_expand_builtin): pass pointer into previous.
+
+2014-10-06  Kugan Vivekanandarajah  <kugan.vivekanandarajah@linaro.org>
+
+	Backport from trunk r214944.
+	2014-09-05  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/arm/cortex-a53.md (cortex_a53_alu_shift): Add alu_ext,
+	alus_ext.
+
+2014-10-06  Venkataramanan Kumar  <venkataramanan.kumar@linaro.org>
+
+	Backport from trunk r214943.
+	2014-09-05  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* config/aarch64/aarch64-simd.md (aarch64_rbit<mode>): New pattern.
+	* config/aarch64/aarch64-simd-builtins.def (rbit): New builtin.
+	* config/aarch64/arm_neon.h (vrbit_s8, vrbit_u8, vrbitq_s8, vrbitq_u8):
+	Replace temporary asm with call to builtin.
+	(vrbit_p8, vrbitq_p8): New functions.
+
+2014-10-06  Michael Collison  <michael.collison@linaro.org>
+
+	Backport from trunk r214886.
+	2014-09-03  Richard Henderson  <rth@redhat.com>
+
+	* config/aarch64/aarch64.c (aarch64_popwb_single_reg): Remove.
+	(aarch64_popwb_pair_reg): Remove.
+	(aarch64_set_frame_expr): Remove.
+	(aarch64_restore_callee_saves): Add CFI_OPS argument; fill it with
+	the restore ops performed by the insns generated.
+	(aarch64_expand_epilogue): Attach CFI_OPS to the stack deallocation
+	insn.  Perform the calls_eh_return addition later; do not attempt to
+	preserve the CFA in that case.  Don't use aarch64_set_frame_expr.
+	(aarch64_expand_prologue): Use REG_CFA_ADJUST_CFA directly, or no
+	special markup at all.  Load cfun->machine->frame.hard_fp_offset
+	into a local variable.
+	(aarch64_frame_pointer_required): Don't check calls_alloca.
+
+2014-10-06  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r215385.
+	2014-09-19  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* config/aarch64/aarch64.md (stack_protect_test_<mode>): Mark
+	scratch register as written.
+
+2014-10-06  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r215346.
+	2014-09-18  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/arm/neon.md (*movmisalign<mode>_neon_load): Change type
+	to neon_load1_1reg<q>.
+
+2014-10-06  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r215321.
+	2014-09-17  Andrew Stubbs  <ams@codesourcery.com>
+
+	* config/arm/arm.c (arm_option_override): Reject -mfpu=neon
+	when architecture is older than ARMv7.
+
+2014-10-06  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r215260.
+	2014-09-14  David Sherwood  <david.sherwood@arm.com>
+
+	* gcc.target/aarch64/vdup_lane_2.c (force_simd): Emit simd mov.
+
+2014-10-06  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r215205.
+	2014-09-12  Wilco Dijkstra  <wilco.dijkstra@arm.com>
+
+	* gcc/ree.c (combine_reaching_defs): Ensure inserted copy don't change
+	the number of hard registers.
+
+2014-10-06  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r215136.
+	2014-09-10  Xinliang David Li  <davidxl@google.com>
+
+	PR target/63209
+	* config/arm/arm.md (movcond_addsi): Handle case where source
+	and target operands are the same.
+
+2014-10-06  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r215086.
+	2014-09-09  Marcus Shawcroft  <marcus.shawcroft@arm.com>
+	Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
+
+	 * config/aarch64/aarch64-elf-raw.h (ENDFILE_SPEC): Add crtfastmath.o.
+         * config/aarch64/aarch64-linux.h (GNU_USER_TARGET_MATH_ENDFILE_SPEC):
+	Define.
+        (ENDFILE_SPEC): Define and use GNU_USER_TARGET_MATH_ENDFILE_SPEC.
+
+2014-10-06  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r215067.
+	2014-09-09  Jiong Wang  <jiong.wang@arm.com>
+
+	* config/arm/arm.c (NEON_COPYSIGNF): New enum.
+	(arm_init_neon_builtins): Support NEON_COPYSIGNF.
+	(arm_builtin_vectorized_function): Likewise.
+	* config/arm/arm_neon_builtins.def: New macro for copysignf.
+	* config/arm/neon.md (neon_copysignf<mode>): New pattern for vector
+	copysignf.
+
+2014-10-03  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r215050, r215051, r215052, r215053, r215054,
+	r215055, r215056.
+	2014-09-09  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/arm/arm.md (vfp_pop_multiple_with_writeback): Use vldm
+	mnemonic instead of fldmfdd.
+	* config/arm/arm.c (vfp_output_fstmd): Rename to...
+	(vfp_output_vstmd): ... This.  Convert output to UAL syntax.
+	Output vpush when address register is SP.
+	* config/arm/arm-protos.h (vfp_output_fstmd): Rename to...
+	(vfp_output_vstmd): ... This.
+	* config/arm/vfp.md (push_multi_vfp): Update call to
+	vfp_output_vstmd.
+
+	2014-09-09  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/arm/vfp.md (*movcc_vfp): Use UAL syntax.
+
+	2014-09-09  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/arm/vfp.md (*sqrtsf2_vfp): Use UAL assembly syntax.
+	(*sqrtdf2_vfp): Likewise.
+	(*cmpsf_vfp): Likewise.
+	(*cmpsf_trap_vfp): Likewise.
+	(*cmpdf_vfp): Likewise.
+	(*cmpdf_trap_vfp): Likewise.
+
+	2014-09-09  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/arm/vfp.md (*extendsfdf2_vfp): Use UAL assembly syntax.
+	(*truncdfsf2_vfp): Likewise.
+	(*truncsisf2_vfp): Likewise.
+	(*truncsidf2_vfp): Likewise.
+	(fixuns_truncsfsi2): Likewise.
+	(fixuns_truncdfsi2): Likewise.
+	(*floatsisf2_vfp): Likewise.
+	(*floatsidf2_vfp): Likewise.
+	(floatunssisf2): Likewise.
+	(floatunssidf2): Likewise.
+
+	2014-09-09  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/arm/vfp.md (*mulsf3_vfp): Use UAL assembly syntax.
+	(*muldf3_vfp): Likewise.
+	(*mulsf3negsf_vfp): Likewise.
+	(*muldf3negdf_vfp): Likewise.
+	(*mulsf3addsf_vfp): Likewise.
+	(*muldf3adddf_vfp): Likewise.
+	(*mulsf3subsf_vfp): Likewise.
+	(*muldf3subdf_vfp): Likewise.
+	(*mulsf3negsfaddsf_vfp): Likewise.
+	(*fmuldf3negdfadddf_vfp): Likewise.
+	(*mulsf3negsfsubsf_vfp): Likewise.
+	(*muldf3negdfsubdf_vfp): Likewise.
+
+	2014-09-09  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/arm/vfp.md (*abssf2_vfp): Use UAL assembly syntax.
+	(*absdf2_vfp): Likewise.
+	(*negsf2_vfp): Likewise.
+	(*negdf2_vfp): Likewise.
+	(*addsf3_vfp): Likewise.
+	(*adddf3_vfp): Likewise.
+	(*subsf3_vfp): Likewise.
+	(*subdf3_vfp): Likewise.
+	(*divsf3_vfp): Likewise.
+	(*divdf3_vfp): Likewise.
+
+	2014-09-09  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/arm/arm.c (output_move_vfp): Use UAL syntax for load/store
+	multiple.
+	(arm_print_operand): Don't convert real values to decimal
+	representation in default case.
+	(fp_immediate_constant): Delete.
+	* config/arm/arm-protos.h (fp_immediate_constant): Likewise.
+	* config/arm/vfp.md (*arm_movsi_vfp): Convert to VFP moves to UAL
+	syntax.
+	(*thumb2_movsi_vfp): Likewise.
+	(*movdi_vfp): Likewise.
+	(*movdi_vfp_cortexa8): Likewise.
+	(*movhf_vfp_neon): Likewise.
+	(*movhf_vfp): Likewise.
+	(*movsf_vfp): Likewise.
+	(*thumb2_movsf_vfp): Likewise.
+	(*movdf_vfp): Likewise.
+	(*thumb2_movdf_vfp): Likewise.
+	(*movsfcc_vfp): Likewise.
+	(*thumb2_movsfcc_vfp): Likewise.
+	(*movdfcc_vfp): Likewise.
+	(*thumb2_movdfcc_vfp): Likewise.
+
+2014-10-03  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r214959.
+	2014-09-05  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/arm/cortex-a53.md (cortex_a53_fpalu): Add f_rints, f_rintd,
+	f_minmaxs, f_minmaxd types.
+
+2014-10-03  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r214947.
+	2014-09-05  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* config/aarch64/aarch64-builtins.c (enum aarch64_type_qualifiers):
+	Remove qualifier_const_pointer, update comment.
+
+2014-10-03  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r214940.
+	2014-09-05  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* config/aarch64/aarch64.md (sibcall_value_insn): Give operand 1
+	DImode.
+
+2014-10-03  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r213090.
+	2014-07-26  Andrew Pinski  <apinski@cavium.com>
+
+	* config/aarch64/aarch64.md (*extr_insv_lower_reg<mode>): Remove +
+	from the read only register.
+
+2014-09-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	* LINARO-VERSION: Bump version.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+	* LINARO-VERSION: Update.
+
+2014-09-09  Venkataramanan Kumar  <venkataramanan.kumar@linaro.org>
+
+	Backport from trunk r215004.
+	2014-09-07 Venkataramanan Kumar <venkataramanan.kumar@linaro.org>
+
+	PR target/63190
+	* config/aarch64/aarch64.md (stack_protect_test_<mode>) Add register
+	constraint for operand0 and remove write only modifier from operand3.
+
+2014-09-09  Michael Collison  <michael.collison@linaro.org>
+
+	Backport from trunk r212178
+	2014-06-30  Joseph Myers  <joseph@codesourcery.com>
+
+	* var-tracking.c (add_stores): Return instead of asserting if old
+	and new values for conditional store are the same.
+
+2014-09-03  Yvan Roux  <yvan.roux@linaro.org>
+
+	Revert:
+	2014-09-03  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r213712.
+	2014-08-07  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/aarch64/aarch64.md (absdi2): Set simd attribute.
+	(aarch64_reload_mov<mode>): Predicate on TARGET_FLOAT.
+	(aarch64_movdi_<mode>high): Likewise.
+	(aarch64_mov<mode>high_di): Likewise.
+	(aarch64_movdi_<mode>low): Likewise.
+	(aarch64_mov<mode>low_di): Likewise.
+	(aarch64_movtilow_tilow): Likewise.
+	Add comment explaining usage of fp,simd attributes and of
+	TARGET_FLOAT and TARGET_SIMD.
+
+2014-09-03  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r213712.
+	2014-08-07  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/aarch64/aarch64.md (absdi2): Set simd attribute.
+	(aarch64_reload_mov<mode>): Predicate on TARGET_FLOAT.
+	(aarch64_movdi_<mode>high): Likewise.
+	(aarch64_mov<mode>high_di): Likewise.
+	(aarch64_movdi_<mode>low): Likewise.
+	(aarch64_mov<mode>low_di): Likewise.
+	(aarch64_movtilow_tilow): Likewise.
+	Add comment explaining usage of fp,simd attributes and of
+	TARGET_FLOAT and TARGET_SIMD.
+
+2014-09-03  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r214526.
+	2014-08-26  Joseph Myers  <joseph@codesourcery.com>
+
+	PR target/60606
+	PR target/61330
+	* varasm.c (make_decl_rtl): Clear DECL_ASSEMBLER_NAME and
+	DECL_HARD_REGISTER and return for invalid register specifications.
+	* cfgexpand.c (expand_one_var): If expand_one_hard_reg_var clears
+	DECL_HARD_REGISTER, call expand_one_error_var.
+	* config/arm/arm.c (arm_hard_regno_mode_ok): Do not allow
+	CC_REGNUM with non-MODE_CC modes.
+	(arm_regno_class): Return NO_REGS for PC_REGNUM.
+
+2014-09-03  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r214503.
+	2014-08-26  Evandro Menezes <e.menezes@samsung.com>
+
+	* config/arm/aarch64/aarch64.c (generic_addrcost_table): Delete
+	qi cost; add di cost.
+	(cortexa57_addrcost_table): Likewise.
+
+2014-09-03  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r213659.
+	2014-08-06  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* config/aarch64/aarch64.c (aarch64_evpc_dup): Enable for bigendian.
+	(aarch64_expand_vec_perm_const): Check for dup before zip.
+
+2014-09-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r213651.
+	2014-08-06  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/aarch64/aarch64.c (aarch64_classify_address): Use REG_P and
+	CONST_INT_P instead of GET_CODE and compare.
+	(aarch64_select_cc_mode): Likewise.
+	(aarch64_print_operand): Likewise.
+	(aarch64_rtx_costs): Likewise.
+	(aarch64_simd_valid_immediate): Likewise.
+	(aarch64_simd_check_vect_par_cnst_half): Likewise.
+	(aarch64_simd_emit_pair_result_insn): Likewise.
+
+2014-08-29  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r212978.
+	2014-07-24  Andreas Schwab  <schwab@suse.de>
+
+	* lib/target-supports.exp (check_effective_target_arm_nothumb):
+	Also check for __arm__.
+
+2014-08-29  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	Fix backport from trunk 211440:
+	* config.gcc (aarch64*-*-*): Restore need_64bit_hwint=yes.
+
+	This is necessary to build aarch64* compilers on i686 host.
+
+2014-08-26  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r213627.
+	2014-08-05  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* config/aarch64/aarch64-builtins.c
+	(aarch64_simd_builtin_type_mode): Delete.
+	(v8qi_UP): Remap to V8QImode.
+	(v4hi_UP): Remap to V4HImode.
+	(v2si_UP): Remap to V2SImode.
+	(v2sf_UP): Remap to V2SFmode.
+	(v1df_UP): Remap to V1DFmode.
+	(di_UP): Remap to DImode.
+	(df_UP): Remap to DFmode.
+	(v16qi_UP):V16QImode.
+	(v8hi_UP): Remap to V8HImode.
+	(v4si_UP): Remap to V4SImode.
+	(v4sf_UP): Remap to V4SFmode.
+	(v2di_UP): Remap to V2DImode.
+	(v2df_UP): Remap to V2DFmode.
+	(ti_UP): Remap to TImode.
+	(ei_UP): Remap to EImode.
+	(oi_UP): Remap to OImode.
+	(ci_UP): Map to CImode.
+	(xi_UP): Remap to XImode.
+	(si_UP): Remap to SImode.
+	(sf_UP): Remap to SFmode.
+	(hi_UP): Remap to HImode.
+	(qi_UP): Remap to QImode.
+	(aarch64_simd_builtin_datum): Make mode a machine_mode.
+	(VAR1): Build builtin name.
+	(aarch64_init_simd_builtins): Remove dead code.
+
+2014-08-26  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r213713.
+	2014-08-07  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/arm/arm.md (*cmov<mode>): Set type attribute to fcsel.
+	* config/arm/types.md (f_sels, f_seld): Delete.
+
+2014-08-26  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r213711.
+	2014-08-07  Ian Bolton  <ian.bolton@arm.com>
+		    Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/aarch64/aarch64.c (aarch64_expand_mov_immediate):
+	Use MOVN when one of the half-words is 0xffff.
+
+2014-08-26  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r213632.
+	2014-08-05  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/arm/cortex-a15.md (cortex_a15_alu_shift): Add crc type
+	to reservation.
+	* config/arm/cortex-a53.md (cortex_a53_alu_shift): Likewise.
+
+2014-08-26  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r213630.
+	2014-08-05  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/arm/arm.md (clzsi2): Set predicable_short_it attr to no.
+	(rbitsi2): Likewise.
+	(*arm_rev): Set predicable and predicable_short_it attributes.
+
+2014-08-26  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r213557.
+	2014-08-04  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+		    James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* doc/md.texi (clrsb): Document.
+	(clz): Change reference to x into operand 1.
+	(ctz): Likewise.
+	(popcount): Likewise.
+
+2014-08-26  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r213551, r213556.
+	2014-08-04  Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
+		    Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* sched-deps.c (try_group_insn): Generalise macro fusion hook usage
+	to any two insns.  Update comment.  Rename to sched_macro_fuse_insns.
+	(sched_analyze_insn): Update use of try_group_insn to
+	sched_macro_fuse_insns.
+	* config/i386/i386.c (ix86_macro_fusion_pair_p): Reject 2nd
+	arguments that are not conditional jumps.
+
+2014-08-26  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r213490.
+	2014-08-01  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* config/aarch64/aarch64-simd-builtins.def (dup_lane, get_lane): Delete.
+
+2014-08-26  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r213488.
+	2014-08-01  Jiong Wang <jiong.wang@arm.com>
+
+	* config/aarch64/aarch64.c (aarch64_classify_address): Accept all offset
+	for frame access when strict_p is false.
+
+2014-08-26  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r213485, r213486, r213487.
+	2014-08-01  Renlin Li <renlin.li@arm.com>
+		    Jiong Wang <jiong.wang@arm.com>
+
+	* config/aarch64/aarch64.c (offset_7bit_signed_scaled_p): Rename to
+	aarch64_offset_7bit_signed_scaled_p, remove static and use it.
+	* config/aarch64/aarch64-protos.h (aarch64_offset_7bit_signed_scaled_p):
+	Declaration.
+	* config/aarch64/predicates.md (aarch64_mem_pair_offset): Define new
+	predicate.
+	* config/aarch64/aarch64.md (loadwb_pair, storewb_pair): Use
+	aarch64_mem_pair_offset.
+
+	2014-08-01  Jiong Wang <jiong.wang@arm.com>
+
+	* config/aarch64/aarch64.md (loadwb_pair<GPI:mode>_<P:mode>): Fix
+	offset.
+	(loadwb_pair<GPI:mode>_<P:mode>): Likewise.
+	* config/aarch64/aarch64.c (aarch64_gen_loadwb_pair): Likewise.
+
+2014-08-26  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r213379.
+	2014-07-31  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* config/aarch64/aarch64-builtins.c
+	(aarch64_gimple_fold_builtin): Don't fold reduction operations for
+	BYTES_BIG_ENDIAN.
+
+2014-08-26  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r213378.
+	2014-07-31  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Vary
+	the generated mask based on BYTES_BIG_ENDIAN.
+	(aarch64_simd_check_vect_par_cnst_half): New.
+	* config/aarch64/aarch64-protos.h
+	(aarch64_simd_check_vect_par_cnst_half): New.
+	* config/aarch64/predicates.md (vect_par_cnst_hi_half): Refactor
+	the check out to aarch64_simd_check_vect_par_cnst_half.
+	(vect_par_cnst_lo_half): Likewise.
+	* config/aarch64/aarch64-simd.md
+	(aarch64_simd_move_hi_quad_<mode>): Always use vec_par_cnst_lo_half.
+	(move_hi_quad_<mode>): Always generate a low mask.
+
+2014-08-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r212927, r213304.
+	2014-07-30  Jiong Wang  <jiong.wang@arm.com>
+
+	* config/arm/arm.c (arm_get_frame_offsets): Adjust condition for
+	Thumb2.
+
+	2014-07-23  Jiong Wang  <jiong.wang@arm.com>
+
+	* config/arm/arm.c (arm_get_frame_offsets): If both r3 and other
+	callee-saved registers are available for padding purpose
+	and r3 is not mandatory, then prefer use those callee-saved
+	instead of r3.
+
+2014-08-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r211717, r213692.
+	2014-08-07  Kugan Vivekanandarajah  <kuganv@linaro.org>
+
+	* config/arm/arm.c (bdesc_2arg): Fix typo.
+	(arm_atomic_assign_expand_fenv): Remove The default implementation.
+
+	2014-06-17  Kugan Vivekanandarajah  <kuganv@linaro.org>
+
+	* config/arm/arm.c (arm_atomic_assign_expand_fenv): call
+	default_atomic_assign_expand_fenv for !TARGET_HARD_FLOAT.
+	(arm_init_builtins) : Initialize builtins __builtins_arm_set_fpscr and
+	__builtins_arm_get_fpscr only when TARGET_HARD_FLOAT.
+	* config/arm/vfp.md (set_fpscr): Make pattern conditional on
+	TARGET_HARD_FLOAT.
+	(get_fpscr) : Likewise.
+
+2014-08-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r212989, r213628.
+	2014-08-05  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* convert.c (convert_to_integer): Guard transformation to lrint by
+	-fno-math-errno.
+
+	2014-07-24  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	PR middle-end/61876
+	* convert.c (convert_to_integer): Do not convert BUILT_IN_ROUND and cast
+	when flag_errno_math is on.
+
+2014-08-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	* LINARO-VERSION: Bump version.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+	* LINARO-VERSION: Update.
+
+2014-08-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r212912, r212913.
+	2014-07-22  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/aarch64/aarch64.c (aarch64_rtx_costs): Handle CLRSB, CLZ.
+	(case UNSPEC): Handle UNSPEC_RBIT.
+
+	2014-07-22  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/aarch64/aarch64.md: Delete UNSPEC_CLS.
+	(clrsb<mode>2): Use clrsb RTL code instead of UNSPEC_CLS.
+
+2014-08-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r213555.
+	2014-08-04  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	PR target/61713
+	* gcc/optabs.c (expand_atomic_test_and_set): Do not try to emit
+	move to subtarget in serial version if result is ignored.
+
+2014-08-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r213376.
+	2014-07-31  Charles Baylis  <charles.baylis@linaro.org>
+
+	PR target/61948
+	* config/arm/neon.md (ashldi3_neon): Don't emit arm_ashldi3_1bit unless
+	constraints are satisfied.
+	(<shift>di3_neon): Likewise.
+
+2014-08-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r211270, r211271, r211273, r211275, r212943,
+	r212945, r212946, r212947, r212949, r212950, r212951, r212952, r212954,
+	r212955, r212956, r212957, r212958, r212976, r212996, r212997, r212999,
+	r213000.
+	2014-07-24  Jiong Wang  <jiong.wang@arm.com>
+
+	* config/aarch64/aarch64.c (aarch64_popwb_single_reg): New function.
+	(aarch64_expand_epilogue): Optimize epilogue when !frame_pointer_needed.
+
+	2014-07-24  Jiong Wang  <jiong.wang@arm.com>
+
+	* config/aarch64/aarch64.c (aarch64_pushwb_single_reg): New function.
+	(aarch64_expand_prologue): Optimize prologue when !frame_pointer_needed.
+
+	2014-07-24  Jiong Wang  <jiong.wang@arm.com>
+
+	* config/aarch64/aarch64.c (aarch64_restore_callee_saves)
+	(aarch64_save_callee_saves): New parameter "skip_wb".
+	(aarch64_expand_prologue, aarch64_expand_epilogue): Update call site.
+
+	2014-07-24  Jiong Wang  <jiong.wang@arm.com>
+
+	* config/aarch64/aarch64.h (frame): New fields "wb_candidate1" and
+	"wb_candidate2".
+	* config/aarch64/aarch64.c (aarch64_layout_frame): Initialize above.
+
+	2014-07-24  Jiong Wang  <jiong.wang@arm.com>
+
+	* config/aarch64/aarch64.c (aarch64_expand_epilogue): Don't
+	subtract outgoing area size when restoring stack_pointer_rtx.
+
+	2014-07-23  Jiong Wang  <jiong.wang@arm.com>
+
+	* config/aarch64/aarch64.c (aarch64_popwb_pair_reg)
+	(aarch64_gen_loadwb_pair): New helper function.
+	(aarch64_expand_epilogue): Simplify code using new helper functions.
+	* config/aarch64/aarch64.md (loadwb_pair<GPF:mode>_<P:mode>): Define.
+
+	2014-07-23  Jiong Wang  <jiong.wang@arm.com>
+
+	* config/aarch64/aarch64.c (aarch64_pushwb_pair_reg)
+	(aarch64_gen_storewb_pair): New helper function.
+	(aarch64_expand_prologue): Simplify code using new helper functions.
+	* config/aarch64/aarch64.md (storewb_pair<GPF:mode>_<P:mode>): Define.
+
+	2014-07-23  Jiong Wang  <jiong.wang@arm.com>
+
+	* config/aarch64/aarch64.md: (aarch64_save_or_restore_callee_saves):
+	Rename to aarch64_save_callee_saves, remove restore code.
+	(aarch64_restore_callee_saves): New function.
+
+	2014-07-23  Jiong Wang  <jiong.wang@arm.com>
+
+	* config/aarch64/aarch64.c (aarch64_save_or_restore_fprs): Deleted.
+	(aarch64_save_callee_saves): New function to handle reg save
+	for both core and vectore regs.
+
+	2014-07-23  Jiong Wang  <jiong.wang@arm.com>
+
+	* config/aarch64/aarch64.c (aarch64_gen_load_pair)
+	(aarch64_gen_store_pair): New helper function.
+	(aarch64_save_or_restore_callee_save_registers)
+	(aarch64_save_or_restore_fprs): Use new helper functions.
+
+	2014-07-23  Jiong Wang  <jiong.wang@arm.com>
+
+	* config/aarch64/aarch64.c (aarch64_next_callee_save): New function.
+	(aarch64_save_or_restore_callee_save_registers)
+	(aarch64_save_or_restore_fprs): Use aarch64_next_callee_save.
+
+	2014-07-23  Jiong Wang  <jiong.wang@arm.com>
+
+	* config/aarch64/aarch64.c
+	(aarch64_save_or_restore_callee_save_registers)
+	(aarch64_save_or_restore_fprs): Hoist calculation of register rtx.
+
+	2014-07-23  Jiong Wang  <jiong.wang@arm.com>
+
+	* config/aarch64/aarch64.c
+	(aarch64_save_or_restore_callee_save_registers)
+	(aarch64_save_or_restore_fprs): Remove 'increment'.
+
+	2014-07-23  Jiong Wang  <jiong.wang@arm.com>
+
+	* config/aarch64/aarch64.c
+	(aarch64_save_or_restore_callee_save_registers)
+	(aarch64_save_or_restore_fprs): Use register offset in
+	cfun->machine->frame.reg_offset.
+
+	2014-07-23  Jiong Wang  <jiong.wang@arm.com>
+
+	* config/aarch64/aarch64.c
+	(aarch64_save_or_restore_callee_save_registers)
+	(aarch64_save_or_restore_fprs): Remove base_rtx.
+
+	2014-07-23  Jiong Wang  <jiong.wang@arm.com>
+
+	* config/aarch64/aarch64.c
+	(aarch64_save_or_restore_callee_save_registers): Rename 'offset'
+	to 'start_offset'.  Remove local variable 'start_offset'.
+
+	2014-07-23  Jiong Wang  <jiong.wang@arm.com>
+
+	* config/aarch64/aarch64.c (aarch64_save_or_restore_fprs): Change
+	type to HOST_WIDE_INT.
+
+	2014-07-23  Jiong Wang  <jiong.wang@arm.com>
+
+	* config/aarch64/aarch64.c (aarch64_expand_prologue)
+	(aarch64_save_or_restore_fprs)
+	(aarch64_save_or_restore_callee_save_registers): GNU-Stylize code.
+
+	2014-06-05  Marcus Shawcroft  <marcus.shawcroft@arm.com>
+
+	* config/aarch64/aarch64.h (aarch64_frame): Add hard_fp_offset and
+	frame_size.
+	* config/aarch64/aarch64.c (aarch64_layout_frame): Initialize
+	aarch64_frame hard_fp_offset and frame_size.
+	(aarch64_expand_prologue): Use aarch64_frame hard_fp_offset and
+	frame_size; remove original_frame_size.
+	(aarch64_expand_epilogue, aarch64_final_eh_return_addr): Likewise.
+	(aarch64_initial_elimination_offset): Remove frame_size and
+	offset.  Use aarch64_frame frame_size.
+
+	2014-06-05  Marcus Shawcroft  <marcus.shawcroft@arm.com>
+		    Jiong Wang  <jiong.wang@arm.com>
+
+	* config/aarch64/aarch64.c (aarch64_layout_frame): Correct
+	initialization of R30 offset.  Update offset.  Iterate core
+	regisers upto X30.  Remove X29, X30 specific code.
+
+	2014-06-05  Marcus Shawcroft  <marcus.shawcroft@arm.com>
+		    Jiong Wang  <jiong.wang@arm.com>
+
+	* config/aarch64/aarch64.c (SLOT_NOT_REQUIRED, SLOT_REQUIRED): Define.
+	(aarch64_layout_frame): Use SLOT_NOT_REQUIRED and SLOT_REQUIRED.
+	(aarch64_register_saved_on_entry): Adjust test.
+
+	2014-06-05  Marcus Shawcroft  <marcus.shawcroft@arm.com>
+
+	* config/aarch64/aarch64.h (machine_function): Move
+	saved_varargs_size from here...
+	(aarch64_frameGTY): ... to here.
+
+	* config/aarch64/aarch64.c (aarch64_expand_prologue)
+	(aarch64_expand_epilogue, aarch64_final_eh_return_addr)
+	(aarch64_initial_elimination_offset)
+	(aarch64_setup_incoming_varargs): Adjust location of
+	saved_varargs_size.
+
+2014-08-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r212753.
+	2014-07-17  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/aarch64/aarch64.c (aarch64_frint_unspec_p): New function.
+	(aarch64_rtx_costs): Handle FIX, UNSIGNED_FIX, UNSPEC.
+
+2014-08-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r212752.
+	2014-07-17  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/aarch64/arm_neon.h (vmlal_high_lane_s16): Fix type.
+	(vmlal_high_lane_s32): Likewise.
+	(vmlal_high_lane_u16): Likewise.
+	(vmlal_high_lane_u32): Likewise.
+	(vmlsl_high_lane_s16): Likewise.
+	(vmlsl_high_lane_s32): Likewise.
+	(vmlsl_high_lane_u16): Likewise.
+	(vmlsl_high_lane_u32): Likewise.
+
+2014-08-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r212512.
+	2014-07-14  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/arm/cortex-a15.md (cortex_a15_alu): Handle clz, rbit.
+	* config/arm/cortex-a5.md (cortex_a5_alu): Likewise.
+	* config/arm/cortex-a53.md (cortex_a53_alu): Likewise.
+	* config/arm/cortex-a7.md (cortex_a7_alu_reg): Likewise.
+	* config/arm/cortex-a9.md (cortex_a9_dp): Likewise.
+	* config/arm/cortex-m4.md (cortex_m4_alu): Likewise.
+	* config/arm/cortex-r4.md (cortex_r4_alu): Likewise.
+
+2014-08-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r212358.
+	2014-07-08  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/arm/arm.c (cortexa5_extra_costs): New table.
+	(arm_cortex_a5_tune): Use cortexa5_extra_costs.
+
+2014-08-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r212296.
+	2014-07-04  Tom de Vries  <tom@codesourcery.com>
+
+	* config/aarch64/aarch64-simd.md
+	(define_insn "vec_unpack_trunc_<mode>"): Fix constraint.
+
+2014-08-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r212142, r212225.
+	2014-07-02  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/aarch64/aarch64.c (aarch64_expand_vec_perm): Delete unused
+	variable i.
+
+	2014-06-30  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* config/aarch64/aarch64-simd.md (vec_perm): Enable for bigendian.
+	* config/aarch64/aarch64.c (aarch64_expand_vec_perm): Remove assert
+	against bigendian and adjust indices.
+
+2014-08-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r211779.
+	2014-06-18  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/arm/arm_neon.h (vadd_f32): Change #ifdef to __FAST_MATH.
+
+2014-07-30  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r211503.
+	2014-06-12  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* config/aarch64/arm_neon.h (vmlaq_n_f64, vmlsq_n_f64, vrsrtsq_f64,
+	vcge_p8, vcgeq_p8, vcgez_p8, vcgez_u8, vcgez_u16, vcgez_u32, vcgez_u64,
+	vcgezq_p8, vcgezq_u8, vcgezq_u16, vcgezq_u32, vcgezq_u64, vcgezd_u64,
+	vcgt_p8, vcgtq_p8, vcgtz_p8, vcgtz_u8, vcgtz_u16, vcgtz_u32, vcgtz_u64,
+	vcgtzq_p8, vcgtzq_u8, vcgtzq_u16, vcgtzq_u32, vcgtzq_u64, vcgtzd_u64,
+	vcle_p8, vcleq_p8, vclez_p8, vclez_u64, vclezq_p8, vclezd_u64, vclt_p8,
+	vcltq_p8, vcltz_p8, vcltzq_p8, vcltzd_u64): Remove functions as they are
+	not in the spec.
+
+2014-07-30  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r211140.
+	2014-06-02  Marcus Shawcroft  <marcus.shawcroft@arm.com>
+
+	* config/aarch64/aarch64.md (set_fpcr): Drop ISB after FPCR write.
+
+2014-07-29  Yvan Roux  <yvan.roux@linaro.org>
+
+	* LINARO-VERSION: Bump version.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+	* LINARO-VERSION: Update.
+
+2014-07-20  Yvan Roux  <yvan.roux@linaro.org>
+
+	Revert:
+	2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r211129.
+	2014-06-02  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
+
+	PR target/61154
+	* config/arm/arm.h (TARGET_SUPPORTS_WIDE_INT): Define.
+	* config/arm/arm.md (mov64 splitter): Replace const_double_operand
+	with immediate_operand.
+
+2014-07-19  Yvan Roux  <yvan.roux@linaro.org>
+
+	* LINARO-VERSION: Bump version.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+	* LINARO-VERSION: Update.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r211887, r211899.
+	2014-06-23  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* config/aarch64/aarch64.md (addsi3_aarch64): Set "simd" attr to
+	"yes" where needed.
+
+	2014-06-23  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* config/aarch64/aarch64.md (*addsi3_aarch64): Add alternative in
+	vector registers.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r211440.
+	2014-06-11  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config.gcc (aarch64*-*-*): Add arm_acle.h to extra headers.
+	* Makefile.in (TEXI_GCC_FILES): Add aarch64-acle-intrinsics.texi to
+	dependencies.
+	* config/aarch64/aarch64-builtins.c (AARCH64_CRC32_BUILTINS): Define.
+	(aarch64_crc_builtin_datum): New struct.
+	(aarch64_crc_builtin_data): New.
+	(aarch64_init_crc32_builtins): New function.
+	(aarch64_init_builtins): Initialise CRC32 builtins when appropriate.
+	(aarch64_crc32_expand_builtin): New.
+	(aarch64_expand_builtin): Add CRC32 builtin expansion case.
+	* config/aarch64/aarch64.h (TARGET_CPU_CPP_BUILTINS): Define
+	__ARM_FEATURE_CRC32 when appropriate.
+	(TARGET_CRC32): Define.
+	* config/aarch64/aarch64.md (UNSPEC_CRC32B, UNSPEC_CRC32H,
+	UNSPEC_CRC32W, UNSPEC_CRC32X, UNSPEC_CRC32CB, UNSPEC_CRC32CH,
+	UNSPEC_CRC32CW, UNSPEC_CRC32CX): New unspec values.
+	(aarch64_<crc_variant>): New pattern.
+	* config/aarch64/arm_acle.h: New file.
+	* config/aarch64/iterators.md (CRC): New int iterator.
+	(crc_variant, crc_mode): New int attributes.
+	* doc/aarch64-acle-intrinsics.texi: New file.
+	* doc/extend.texi (aarch64): Document aarch64 ACLE intrinsics.
+	Include aarch64-acle-intrinsics.texi.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r211174.
+	2014-06-03  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* config/aarch64/aarch64-simd.md (aarch64_rev<REVERSE:rev-op><mode>):
+	New pattern.
+	* config/aarch64/aarch64.c (aarch64_evpc_rev): New function.
+	(aarch64_expand_vec_perm_const_1): Add call to aarch64_evpc_rev.
+	* config/aarch64/iterators.md (REVERSE): New iterator.
+	(UNSPEC_REV64, UNSPEC_REV32, UNSPEC_REV16): New enum elements.
+	(rev_op): New int_attribute.
+	* config/aarch64/arm_neon.h (vrev16_p8, vrev16_s8, vrev16_u8,
+	vrev16q_p8, vrev16q_s8, vrev16q_u8, vrev32_p8, vrev32_p16, vrev32_s8,
+	vrev32_s16, vrev32_u8, vrev32_u16, vrev32q_p8, vrev32q_p16, vrev32q_s8,
+	vrev32q_s16, vrev32q_u8, vrev32q_u16, vrev64_f32, vrev64_p8,
+	vrev64_p16, vrev64_s8, vrev64_s16, vrev64_s32, vrev64_u8, vrev64_u16,
+	vrev64_u32, vrev64q_f32, vrev64q_p8, vrev64q_p16, vrev64q_s8,
+	vrev64q_s16, vrev64q_s32, vrev64q_u8, vrev64q_u16, vrev64q_u32):
+	Replace temporary __asm__ with __builtin_shuffle.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r210216, r210218, r210219.
+	2014-05-08  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
+
+	* config/arm/arm_neon.h: Update comment.
+	* config/arm/neon-docgen.ml: Delete.
+	* config/arm/neon-gen.ml: Delete.
+	* doc/arm-neon-intrinsics.texi: Update comment.
+
+	2014-05-08  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
+
+	* config/arm/arm_neon_builtins.def (vadd, vsub): Only define the v2sf
+	and v4sf versions.
+	(vand, vorr, veor, vorn, vbic): Remove.
+	* config/arm/neon.md (neon_vadd, neon_vsub, neon_vadd_unspec): Adjust
+	iterator.
+	(neon_vsub_unspec): Likewise.
+	(neon_vorr, neon_vand, neon_vbic, neon_veor, neon_vorn): Remove.
+
+	2014-05-08  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
+
+	* config/arm/arm_neon.h (vadd_s8): GNU C implementation
+	(vadd_s16): Likewise.
+	(vadd_s32): Likewise.
+	(vadd_f32): Likewise.
+	(vadd_u8): Likewise.
+	(vadd_u16): Likewise.
+	(vadd_u32): Likewise.
+	(vadd_s64): Likewise.
+	(vadd_u64): Likewise.
+	(vaddq_s8): Likewise.
+	(vaddq_s16): Likewise.
+	(vaddq_s32): Likewise.
+	(vaddq_s64): Likewise.
+	(vaddq_f32): Likewise.
+	(vaddq_u8): Likewise.
+	(vaddq_u16): Likewise.
+	(vaddq_u32): Likewise.
+	(vaddq_u64): Likewise.
+	(vmul_s8): Likewise.
+	(vmul_s16): Likewise.
+	(vmul_s32): Likewise.
+	(vmul_f32): Likewise.
+	(vmul_u8): Likewise.
+	(vmul_u16): Likewise.
+	(vmul_u32): Likewise.
+	(vmul_p8): Likewise.
+	(vmulq_s8): Likewise.
+	(vmulq_s16): Likewise.
+	(vmulq_s32): Likewise.
+	(vmulq_f32): Likewise.
+	(vmulq_u8): Likewise.
+	(vmulq_u16): Likewise.
+	(vmulq_u32): Likewise.
+	(vsub_s8): Likewise.
+	(vsub_s16): Likewise.
+	(vsub_s32): Likewise.
+	(vsub_f32): Likewise.
+	(vsub_u8): Likewise.
+	(vsub_u16): Likewise.
+	(vsub_u32): Likewise.
+	(vsub_s64): Likewise.
+	(vsub_u64): Likewise.
+	(vsubq_s8): Likewise.
+	(vsubq_s16): Likewise.
+	(vsubq_s32): Likewise.
+	(vsubq_s64): Likewise.
+	(vsubq_f32): Likewise.
+	(vsubq_u8): Likewise.
+	(vsubq_u16): Likewise.
+	(vsubq_u32): Likewise.
+	(vsubq_u64): Likewise.
+	(vand_s8): Likewise.
+	(vand_s16): Likewise.
+	(vand_s32): Likewise.
+	(vand_u8): Likewise.
+	(vand_u16): Likewise.
+	(vand_u32): Likewise.
+	(vand_s64): Likewise.
+	(vand_u64): Likewise.
+	(vandq_s8): Likewise.
+	(vandq_s16): Likewise.
+	(vandq_s32): Likewise.
+	(vandq_s64): Likewise.
+	(vandq_u8): Likewise.
+	(vandq_u16): Likewise.
+	(vandq_u32): Likewise.
+	(vandq_u64): Likewise.
+	(vorr_s8): Likewise.
+	(vorr_s16): Likewise.
+	(vorr_s32): Likewise.
+	(vorr_u8): Likewise.
+	(vorr_u16): Likewise.
+	(vorr_u32): Likewise.
+	(vorr_s64): Likewise.
+	(vorr_u64): Likewise.
+	(vorrq_s8): Likewise.
+	(vorrq_s16): Likewise.
+	(vorrq_s32): Likewise.
+	(vorrq_s64): Likewise.
+	(vorrq_u8): Likewise.
+	(vorrq_u16): Likewise.
+	(vorrq_u32): Likewise.
+	(vorrq_u64): Likewise.
+	(veor_s8): Likewise.
+	(veor_s16): Likewise.
+	(veor_s32): Likewise.
+	(veor_u8): Likewise.
+	(veor_u16): Likewise.
+	(veor_u32): Likewise.
+	(veor_s64): Likewise.
+	(veor_u64): Likewise.
+	(veorq_s8): Likewise.
+	(veorq_s16): Likewise.
+	(veorq_s32): Likewise.
+	(veorq_s64): Likewise.
+	(veorq_u8): Likewise.
+	(veorq_u16): Likewise.
+	(veorq_u32): Likewise.
+	(veorq_u64): Likewise.
+	(vbic_s8): Likewise.
+	(vbic_s16): Likewise.
+	(vbic_s32): Likewise.
+	(vbic_u8): Likewise.
+	(vbic_u16): Likewise.
+	(vbic_u32): Likewise.
+	(vbic_s64): Likewise.
+	(vbic_u64): Likewise.
+	(vbicq_s8): Likewise.
+	(vbicq_s16): Likewise.
+	(vbicq_s32): Likewise.
+	(vbicq_s64): Likewise.
+	(vbicq_u8): Likewise.
+	(vbicq_u16): Likewise.
+	(vbicq_u32): Likewise.
+	(vbicq_u64): Likewise.
+	(vorn_s8): Likewise.
+	(vorn_s16): Likewise.
+	(vorn_s32): Likewise.
+	(vorn_u8): Likewise.
+	(vorn_u16): Likewise.
+	(vorn_u32): Likewise.
+	(vorn_s64): Likewise.
+	(vorn_u64): Likewise.
+	(vornq_s8): Likewise.
+	(vornq_s16): Likewise.
+	(vornq_s32): Likewise.
+	(vornq_s64): Likewise.
+	(vornq_u8): Likewise.
+	(vornq_u16): Likewise.
+	(vornq_u32): Likewise.
+	(vornq_u64): Likewise.
+
+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r210151.
+	2014-05-07  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* config/aarch64/arm_neon.h (vtrn1_f32, vtrn1_p8, vtrn1_p16, vtrn1_s8,
+	vtrn1_s16, vtrn1_s32, vtrn1_u8, vtrn1_u16, vtrn1_u32, vtrn1q_f32,
+	vtrn1q_f64, vtrn1q_p8, vtrn1q_p16, vtrn1q_s8, vtrn1q_s16, vtrn1q_s32,
+	vtrn1q_s64, vtrn1q_u8, vtrn1q_u16, vtrn1q_u32, vtrn1q_u64, vtrn2_f32,
+	vtrn2_p8, vtrn2_p16, vtrn2_s8, vtrn2_s16, vtrn2_s32, vtrn2_u8,
+	vtrn2_u16, vtrn2_u32, vtrn2q_f32, vtrn2q_f64, vtrn2q_p8, vtrn2q_p16,
+	vtrn2q_s8, vtrn2q_s16, vtrn2q_s32, vtrn2q_s64, vtrn2q_u8, vtrn2q_u16,
+	vtrn2q_u32, vtrn2q_u64): Replace temporary asm with __builtin_shuffle.
+
+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209794.
+	2014-04-25  Marek Polacek  <polacek@redhat.com>
+
+	PR c/60114
+	* c-parser.c (c_parser_initelt): Pass input_location to
+	process_init_element.
+	(c_parser_initval): Pass loc to process_init_element.
+	* c-tree.h (process_init_element): Adjust declaration.
+	* c-typeck.c (push_init_level): Pass input_location to
+	process_init_element.
+	(pop_init_level): Likewise.
+	(set_designator): Likewise.
+	(output_init_element): Add location_t parameter.  Pass loc to
+	digest_init.
+	(output_pending_init_elements): Pass input_location to
+	output_init_element.
+	(process_init_element): Add location_t parameter.  Pass loc to
+	output_init_element.
+
+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r211771.
+	2014-06-18  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* genattrtab.c (n_bypassed): New variable.
+	(process_bypasses): Initialise n_bypassed.
+	Count number of bypassed reservations.
+	(make_automaton_attrs): Allocate space for bypassed reservations
+	rather than number of bypasses.
+
+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r210861.
+	2014-05-23  Jiong Wang   <jiong.wang@arm.com>
+
+	* config/aarch64/predicates.md (aarch64_call_insn_operand): New
+	predicate.
+	* config/aarch64/constraints.md ("Ucs", "Usf"):  New constraints.
+	* config/aarch64/aarch64.md (*sibcall_insn, *sibcall_value_insn):
+	Adjust for tailcalling through registers.
+	* config/aarch64/aarch64.h (enum reg_class): New caller save
+	register class.
+	(REG_CLASS_NAMES): Likewise.
+	(REG_CLASS_CONTENTS): Likewise.
+	* config/aarch64/aarch64.c (aarch64_function_ok_for_sibcall):
+	Allow tailcalling without decls.
+
+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r211314.
+	2014-06-06  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* config/aarch64/aarch64-protos.h (aarch64_expand_movmem): New.
+	* config/aarch64/aarch64.c (aarch64_move_pointer): New.
+	(aarch64_progress_pointer): Likewise.
+	(aarch64_copy_one_part_and_move_pointers): Likewise.
+	(aarch64_expand_movmen): Likewise.
+	* config/aarch64/aarch64.h (MOVE_RATIO): Set low.
+	* config/aarch64/aarch64.md (movmem<mode>): New.
+
+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r211185, 211186.
+	2014-06-03  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* gcc/config/aarch64/aarch64-builtins.c
+	(aarch64_types_binop_uus_qualifiers,
+	aarch64_types_shift_to_unsigned_qualifiers,
+	aarch64_types_unsigned_shiftacc_qualifiers): Define.
+	* gcc/config/aarch64/aarch64-simd-builtins.def (uqshl, uqrshl, uqadd,
+	uqsub, usqadd, usra_n, ursra_n, uqshrn_n, uqrshrn_n, usri_n, usli_n,
+	sqshlu_n, uqshl_n): Update qualifiers.
+	* gcc/config/aarch64/arm_neon.h (vqadd_u8, vqadd_u16, vqadd_u32,
+	vqadd_u64, vqaddq_u8, vqaddq_u16, vqaddq_u32, vqaddq_u64, vqsub_u8,
+	vqsub_u16, vqsub_u32, vqsub_u64, vqsubq_u8, vqsubq_u16, vqsubq_u32,
+	vqsubq_u64, vqaddb_u8, vqaddh_u16, vqadds_u32, vqaddd_u64, vqrshl_u8,
+	vqrshl_u16, vqrshl_u32, vqrshl_u64, vqrshlq_u8, vqrshlq_u16,
+	vqrshlq_u32, vqrshlq_u64, vqrshlb_u8, vqrshlh_u16, vqrshls_u32,
+	vqrshld_u64, vqrshrn_n_u16, vqrshrn_n_u32, vqrshrn_n_u64,
+	vqrshrnh_n_u16, vqrshrns_n_u32, vqrshrnd_n_u64, vqshl_u8, vqshl_u16,
+	vqshl_u32, vqshl_u64, vqshlq_u8, vqshlq_u16, vqshlq_u32, vqshlq_u64,
+	vqshlb_u8, vqshlh_u16, vqshls_u32, vqshld_u64, vqshl_n_u8, vqshl_n_u16,
+	vqshl_n_u32, vqshl_n_u64, vqshlq_n_u8, vqshlq_n_u16, vqshlq_n_u32,
+	vqshlq_n_u64, vqshlb_n_u8, vqshlh_n_u16, vqshls_n_u32, vqshld_n_u64,
+	vqshlu_n_s8, vqshlu_n_s16, vqshlu_n_s32, vqshlu_n_s64, vqshluq_n_s8,
+	vqshluq_n_s16, vqshluq_n_s32, vqshluq_n_s64, vqshlub_n_s8,
+	vqshluh_n_s16, vqshlus_n_s32, vqshlud_n_s64, vqshrn_n_u16,
+	vqshrn_n_u32, vqshrn_n_u64, vqshrnh_n_u16, vqshrns_n_u32,
+	vqshrnd_n_u64, vqsubb_u8, vqsubh_u16, vqsubs_u32, vqsubd_u64,
+	vrsra_n_u8, vrsra_n_u16, vrsra_n_u32, vrsra_n_u64, vrsraq_n_u8,
+	vrsraq_n_u16, vrsraq_n_u32, vrsraq_n_u64, vrsrad_n_u64, vsli_n_u8,
+	vsli_n_u16, vsli_n_u32,vsli_n_u64, vsliq_n_u8, vsliq_n_u16,
+	vsliq_n_u32, vsliq_n_u64, vslid_n_u64, vsqadd_u8, vsqadd_u16,
+	vsqadd_u32, vsqadd_u64, vsqaddq_u8, vsqaddq_u16, vsqaddq_u32,
+	vsqaddq_u64, vsqaddb_u8, vsqaddh_u16, vsqadds_u32, vsqaddd_u64,
+	vsra_n_u8, vsra_n_u16, vsra_n_u32, vsra_n_u64, vsraq_n_u8,
+	vsraq_n_u16, vsraq_n_u32, vsraq_n_u64, vsrad_n_u64, vsri_n_u8,
+	vsri_n_u16, vsri_n_u32, vsri_n_u64, vsriq_n_u8, vsriq_n_u16,
+	vsriq_n_u32, vsriq_n_u64, vsrid_n_u64): Remove casts.
+
+	2014-06-03  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* gcc/config/aarch64/aarch64-builtins.c
+	(aarch64_types_binop_ssu_qualifiers): New static data.
+	(TYPES_BINOP_SSU): Define.
+	* gcc/config/aarch64/aarch64-simd-builtins.def (suqadd, ushl, urshl,
+	urshr_n, ushll_n): Use appropriate unsigned qualifiers.	47
+	* gcc/config/aarch64/arm_neon.h (vrshl_u8, vrshl_u16, vrshl_u32,
+	vrshl_u64, vrshlq_u8, vrshlq_u16, vrshlq_u32, vrshlq_u64, vrshld_u64,
+	vrshr_n_u8, vrshr_n_u16, vrshr_n_u32, vrshr_n_u64, vrshrq_n_u8,	50
+	vrshrq_n_u16, vrshrq_n_u32, vrshrq_n_u64, vrshrd_n_u64, vshll_n_u8,
+	vshll_n_u16, vshll_n_u32, vuqadd_s8, vuqadd_s16, vuqadd_s32,	52
+	vuqadd_s64, vuqaddq_s8, vuqaddq_s16, vuqaddq_s32, vuqaddq_s64,	53
+	vuqaddb_s8, vuqaddh_s16, vuqadds_s32, vuqaddd_s64): Add signedness
+	suffix to builtin function name, remove cast.	55
+	(vshl_s8, vshl_s16, vshl_s32, vshl_s64, vshl_u8, vshl_u16, vshl_u32,
+	vshl_u64, vshlq_s8, vshlq_s16, vshlq_s32, vshlq_s64, vshlq_u8,	57
+	vshlq_u16, vshlq_u32, vshlq_u64, vshld_s64, vshld_u64): Remove cast.
+
+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r211408, 211416.
+	2014-06-10  Marcus Shawcroft  <marcus.shawcroft@arm.com>
+
+	* config/aarch64/aarch64.c (aarch64_save_or_restore_fprs): Fix
+	REG_CFA_RESTORE mode.
+
+	2014-06-10  Jiong Wang  <jiong.wang@arm.com>
+
+	* config/aarch64/aarch64.c (aarch64_save_or_restore_fprs)
+	(aarch64_save_or_restore_callee_save_registers): Fix layout.
+
+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r211418.
+	2014-06-10  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/aarch64/aarch64-simd.md (move_lo_quad_<mode>):
+	Change second alternative type to f_mcr.
+	* config/aarch64/aarch64.md (*movsi_aarch64): Change 11th
+	and 12th alternatives' types to f_mcr and f_mrc.
+	(*movdi_aarch64): Same for 12th and 13th alternatives.
+	(*movsf_aarch64): Change 9th alternatives' type to mov_reg.
+	(aarch64_movtilow_tilow): Change type to fmov.
+
+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r211371.
+	2014-06-09  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
+
+	* config/arm/arm-modes.def: Remove XFmode.
+
+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r211268.
+	2014-06-05  Marcus Shawcroft  <marcus.shawcroft@arm.com>
+
+	* config/aarch64/aarch64.c (aarch64_expand_prologue): Update stack
+	layout comment.
+
+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r211129.
+	2014-06-02  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
+
+	PR target/61154
+	* config/arm/arm.h (TARGET_SUPPORTS_WIDE_INT): Define.
+	* config/arm/arm.md (mov64 splitter): Replace const_double_operand
+	with immediate_operand.
+
+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r211073.
+	2014-05-30  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/arm/thumb2.md (*thumb2_movhi_insn): Set type of movw
+	to mov_imm.
+	* config/arm/vfp.md (*thumb2_movsi_vfp): Likewise.
+
+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r211050.
+	2014-05-29  Richard Earnshaw <rearnsha@arm.com>
+	Richard Sandiford  <rdsandiford@googlemail.com>
+
+	* arm/iterators.md (shiftable_ops): New code iterator.
+	(t2_binop0, arith_shift_insn): New code attributes.
+	* arm/predicates.md (shift_nomul_operator): New predicate.
+	* arm/arm.md (insn_enabled): Delete.
+	(enabled): Remove insn_enabled test.
+	(*arith_shiftsi): Delete.  Replace with ...
+	(*<arith_shift_insn>_multsi): ... new pattern.
+	(*<arith_shift_insn>_shiftsi): ... new pattern.
+	* config/arm/arm.c (arm_print_operand): Handle operand format 'b'.
+
+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r210996.
+	2014-05-27  Andrew Pinski  <apinski@cavium.com>
+
+	* config/aarch64/aarch64.md (stack_protect_set_<mode>):
+	Use <w> for the register in assembly template.
+	(stack_protect_test): Use the mode of operands[0] for the
+	result.
+	(stack_protect_test_<mode>): Use <w> for the register
+	in assembly template.
+
+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r210967.
+	2014-05-27  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/arm/neon.md (neon_bswap<mode>): New pattern.
+	* config/arm/arm.c (neon_itype): Add NEON_BSWAP.
+	(arm_init_neon_builtins): Handle NEON_BSWAP.
+	Define required type nodes.
+	(arm_expand_neon_builtin): Handle NEON_BSWAP.
+	(arm_builtin_vectorized_function): Handle BUILTIN_BSWAP builtins.
+	* config/arm/arm_neon_builtins.def (bswap): Define builtins.
+	* config/arm/iterators.md (VDQHSD): New mode iterator.
+
+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r210471.
+	2014-05-15  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/arm/arm.c (arm_option_override): Use the SCHED_PRESSURE_MODEL
+	enum name for PARAM_SCHED_PRESSURE_ALGORITHM.
+
+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r210369.
+	2014-05-13  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/arm/arm.c (neon_itype): Remove NEON_RESULTPAIR.
+	(arm_init_neon_builtins): Remove handling of NEON_RESULTPAIR.
+	Remove associated type declarations and initialisations.
+	(arm_expand_neon_builtin): Likewise.
+	(neon_emit_pair_result_insn): Delete.
+	* config/arm/arm_neon_builtins (vtrn, vzip, vuzp): Delete.
+	* config/arm/neon.md (neon_vtrn<mode>): Delete.
+	(neon_vzip<mode>): Likewise.
+	(neon_vuzp<mode>): Likewise.
+
+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r211058, 211177.
+	2014-05-29  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* config/aarch64/aarch64-builtins.c (aarch64_types_binopv_qualifiers,
+	TYPES_BINOPV): New static data.
+	* config/aarch64/aarch64-simd-builtins.def (im_lane_bound): New builtin.
+	* config/aarch64/aarch64-simd.md (aarch64_ext, aarch64_im_lane_boundsi):
+	New patterns.
+	* config/aarch64/aarch64.c (aarch64_expand_vec_perm_const_1): Match
+	patterns for EXT.
+	(aarch64_evpc_ext): New function.
+
+	* config/aarch64/iterators.md (UNSPEC_EXT): New enum element.
+
+	* config/aarch64/arm_neon.h (vext_f32, vext_f64, vext_p8, vext_p16,
+	vext_s8, vext_s16, vext_s32, vext_s64, vext_u8, vext_u16, vext_u32,
+	vext_u64, vextq_f32, vextq_f64, vextq_p8, vextq_p16, vextq_s8,
+	vextq_s16, vextq_s32, vextq_s64, vextq_u8, vextq_u16, vextq_u32,
+	vextq_u64): Replace __asm with __builtin_shuffle and im_lane_boundsi.
+
+	2014-06-03  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* config/aarch64/aarch64.c (aarch64_evpc_ext): allow and handle
+	location == 0.
+
+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209797.
+	2014-04-25  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/arm/aarch-common.c (aarch_rev16_shright_mask_imm_p):
+	Use HOST_WIDE_INT_C for mask literal.
+	(aarch_rev16_shleft_mask_imm_p): Likewise.
+
+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r211148.
+	2014-06-02  Andrew Pinski  <apinski@cavium.com>
+
+	* config/aarch64/aarch64-linux.h (GLIBC_DYNAMIC_LINKER):
+	/lib/ld-linux32-aarch64.so.1 is used for ILP32.
+	(LINUX_TARGET_LINK_SPEC): Update linker script for ILP32.
+	file whose name depends on -mabi= and -mbig-endian.
+	* config/aarch64/t-aarch64-linux (MULTILIB_OSDIRNAMES): Handle LP64
+	better and handle ilp32 too.
+	(MULTILIB_OPTIONS): Delete.
+	(MULTILIB_DIRNAMES): Delete.
+
+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r210828, r211103.
+	2014-05-31  Kugan Vivekanandarajah  <kuganv@linaro.org>
+
+	* config/arm/arm.c (TARGET_ATOMIC_ASSIGN_EXPAND_FENV): New define.
+	(arm_builtins) : Add ARM_BUILTIN_GET_FPSCR and ARM_BUILTIN_SET_FPSCR.
+	(bdesc_2arg) : Add description for builtins __builtins_arm_set_fpscr
+	and __builtins_arm_get_fpscr.
+	(arm_init_builtins) : Initialize builtins __builtins_arm_set_fpscr and
+	__builtins_arm_get_fpscr.
+	(arm_expand_builtin) : Expand builtins __builtins_arm_set_fpscr and
+	__builtins_arm_ldfpscr.
+	(arm_atomic_assign_expand_fenv): New function.
+	* config/arm/vfp.md (set_fpscr): New pattern.
+	(get_fpscr) : Likewise.
+	* config/arm/unspecs.md (unspecv): Add VUNSPEC_GET_FPSCR and
+	VUNSPEC_SET_FPSCR.
+	* doc/extend.texi (AARCH64 Built-in Functions) : Document
+	__builtins_arm_set_fpscr, __builtins_arm_get_fpscr.
+
+	2014-05-23  Kugan Vivekanandarajah  <kuganv@linaro.org>
+
+	* config/aarch64/aarch64.c (TARGET_ATOMIC_ASSIGN_EXPAND_FENV): New
+	define.
+	* config/aarch64/aarch64-protos.h (aarch64_atomic_assign_expand_fenv):
+	New function declaration.
+	* config/aarch64/aarch64-builtins.c (aarch64_builtins) : Add
+	AARCH64_BUILTIN_GET_FPCR, AARCH64_BUILTIN_SET_FPCR.
+	AARCH64_BUILTIN_GET_FPSR and AARCH64_BUILTIN_SET_FPSR.
+	(aarch64_init_builtins) : Initialize builtins
+	__builtins_aarch64_set_fpcr, __builtins_aarch64_get_fpcr.
+	__builtins_aarch64_set_fpsr and __builtins_aarch64_get_fpsr.
+	(aarch64_expand_builtin) : Expand builtins __builtins_aarch64_set_fpcr
+	__builtins_aarch64_get_fpcr, __builtins_aarch64_get_fpsr,
+	and __builtins_aarch64_set_fpsr.
+	(aarch64_atomic_assign_expand_fenv): New function.
+	* config/aarch64/aarch64.md (set_fpcr): New pattern.
+	(get_fpcr) : Likewise.
+	(set_fpsr) : Likewise.
+	(get_fpsr) : Likewise.
+	(unspecv): Add UNSPECV_GET_FPCR and UNSPECV_SET_FPCR, UNSPECV_GET_FPSR
+	 and UNSPECV_SET_FPSR.
+	* doc/extend.texi (AARCH64 Built-in Functions) : Document
+	__builtins_aarch64_set_fpcr, __builtins_aarch64_get_fpcr.
+	__builtins_aarch64_set_fpsr and __builtins_aarch64_get_fpsr.
+
+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r210355.
+	2014-05-13  Ian Bolton  <ian.bolton@arm.com>
+
+	* config/aarch64/aarch64-protos.h
+	(aarch64_hard_regno_caller_save_mode): New prototype.
+	* config/aarch64/aarch64.c (aarch64_hard_regno_caller_save_mode):
+	New function.
+	* config/aarch64/aarch64.h (HARD_REGNO_CALLER_SAVE_MODE): New macro.
+
+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209943.
+	2014-04-30  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* config/aarch64/arm_neon.h (vuzp1_f32, vuzp1_p8, vuzp1_p16, vuzp1_s8,
+	vuzp1_s16, vuzp1_s32, vuzp1_u8, vuzp1_u16, vuzp1_u32, vuzp1q_f32,
+	vuzp1q_f64, vuzp1q_p8, vuzp1q_p16, vuzp1q_s8, vuzp1q_s16, vuzp1q_s32,
+	vuzp1q_s64, vuzp1q_u8, vuzp1q_u16, vuzp1q_u32, vuzp1q_u64, vuzp2_f32,
+	vuzp2_p8, vuzp2_p16, vuzp2_s8, vuzp2_s16, vuzp2_s32, vuzp2_u8,
+	vuzp2_u16, vuzp2_u32, vuzp2q_f32, vuzp2q_f64, vuzp2q_p8, vuzp2q_p16,
+	vuzp2q_s8, vuzp2q_s16, vuzp2q_s32, vuzp2q_s64, vuzp2q_u8, vuzp2q_u16,
+	vuzp2q_u32, vuzp2q_u64): Replace temporary asm with __builtin_shuffle.
+
+2014-06-26  Yvan Roux  <yvan.roux@linaro.org>
+
+	* LINARO-VERSION: Bump version.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+	* LINARO-VERSION: Update.
+
+2014-06-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	Revert:
+	2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209643.
+	2014-04-22  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
+
+	* config/aarch64/aarch64.c (TARGET_FLAGS_REGNUM): Define.
+
+2014-06-13  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r210493, 210494, 210495, 210496, 210497, 210498,
+	210499, 210500, 210501, 210502, 210503, 210504, 210505, 210506, 210507,
+	210508, 210509, 210510, 210512, 211205, 211206.
+	2014-05-16  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* config/aarch64/aarch64-protos.h (scale_addr_mode_cost): New.
+	(cpu_addrcost_table): Use it.
+	* config/aarch64/aarch64.c (generic_addrcost_table): Initialize it.
+	(aarch64_address_cost): Rewrite using aarch64_classify_address,
+	move it.
+
+	2014-05-16  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* config/aarch64/aarch64.c (cortexa57_addrcost_table): New.
+	(cortexa57_vector_cost): Likewise.
+	(cortexa57_tunings): Use them.
+
+	2014-05-16  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* config/aarch64/aarch64.c (aarch64_rtx_costs_wrapper): New.
+	(TARGET_RTX_COSTS): Call it.
+
+	2014-05-16  James Greenhalgh  <james.greenhalgh@arm.com>
+		    Philipp Tomsich  <philipp.tomsich@theobroma-systems.com>
+
+	* config/aarch64/aarch64.c (aarch64_build_constant): Conditionally
+	emit instructions, return number of instructions which would
+	be emitted.
+	(aarch64_add_constant): Update call to aarch64_build_constant.
+	(aarch64_output_mi_thunk): Likewise.
+	(aarch64_rtx_costs): Estimate cost of a CONST_INT, cost
+	a CONST_DOUBLE.
+
+	2014-05-16  James Greenhalgh  <james.greenhalgh@arm.com>
+		    Philipp Tomsich  <philipp.tomsich@theobroma-systems.com>
+
+	* config/aarch64/aarch64.c (aarch64_strip_shift_or_extend): Rename
+	to...
+	(aarch64_strip_extend): ...this, don't strip shifts, check RTX is
+	well formed.
+	(aarch64_rtx_mult_cost): New.
+	(aarch64_rtx_costs): Use it, refactor as appropriate.
+
+	2014-05-16  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* config/aarch64/aarch64.c (aarch64_rtx_costs): Set default costs.
+
+	2014-05-16  James Greenhalgh  <james.greenhalgh@arm.com>
+		    Philip Tomsich  <philipp.tomsich@theobroma-systems.com>
+
+	* config/aarch64/aarch64.c (aarch64_rtx_costs): Improve costing
+	for SET RTX.
+
+	2014-05-16  James Greenhalgh  <james.greenhalgh@arm.com>
+		    Philipp Tomsich  <philipp.tomsich@theobroma-systems.com>
+
+	* config/aarch64/aarch64.c (aarch64_rtx_costs): Use address
+	costs when costing loads and stores to memory.
+
+	2014-05-16  James Greenhalgh  <james.greenhalgh@arm.com>
+		    Philipp Tomsich  <philipp.tomsich@theobroma-systems.com>
+
+	* config/aarch64/aarch64.c (aarch64_rtx_costs): Improve cost for
+	logical operations.
+
+	2014-05-16  James Greenhalgh  <james.greenhalgh@arm.com>
+		    Philipp Tomsich  <philipp.tomsich@theobroma-systems.com>
+
+	* config/aarch64/aarch64.c (aarch64_rtx_costs): Cost
+	ZERO_EXTEND and SIGN_EXTEND better.
+
+	2014-05-16  James Greenhalgh  <james.greenhalgh@arm.com>
+		    Philipp Tomsich  <philipp.tomsich@theobroma-systems.com>
+
+	* config/aarch64/aarch64.c (aarch64_rtx_costs): Improve costs for
+	rotates and shifts.
+
+	2014-03-16  James Greenhalgh  <james.greenhalgh@arm.com>
+		    Philipp Tomsich  <philipp.tomsich@theobroma-systems.com>
+
+	* config/aarch64/aarch64.c (aarch64_rtx_arith_op_extract_p): New.
+	(aarch64_rtx_costs): Improve costs for SIGN/ZERO_EXTRACT.
+
+	2014-05-16  James Greenhalgh  <james.greenhalgh@arm.com>
+		    Philipp Tomsich  <philipp.tomsich@theobroma-systems.com>
+
+	* config/aarch64/aarch64.c (aarch64_rtx_costs): Improve costs for
+	DIV/MOD.
+
+	2014-05-16  James Greenhalgh  <james.greenhalgh@arm.com>
+		    Philipp Tomsich  <philipp.tomsich@theobroma-systems.com>
+
+	* config/aarch64/aarch64.c (aarch64_rtx_costs): Cost comparison
+	operators.
+
+	2014-05-16  James Greenhalgh  <james.greenhalgh@arm.com>
+		    Philipp Tomsich  <philipp.tomsich@theobroma-systems.com>
+
+	* config/aarch64/aarch64.c (aarch64_rtx_costs): Cost FMA,
+	FLOAT_EXTEND, FLOAT_TRUNCATE, ABS, SMAX, and SMIN.
+
+	2014-05-16  James Greenhalgh  <james.greenhalgh@arm.com>
+		    Philipp Tomsich  <philipp.tomsich@theobroma-systems.com>
+
+	* config/aarch64/aarch64.c (aarch64_rtx_costs): Cost TRUNCATE.
+
+	2014-05-16  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* config/aarch64/aarch64.c (aarch64_rtx_costs): Cost SYMBOL_REF,
+	HIGH, LO_SUM.
+
+	2014-05-16  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* config/aarch64/aarch64.c (aarch64_rtx_costs): Handle the case
+	where we were unable to cost an RTX.
+
+	2014-05-16  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* config/aarch64/aarch64.c (aarch64_rtx_mult_cost): Fix FNMUL case.
+
+	2014-06-03  Andrew Pinski  <apinski@cavium.com>
+
+	* config/aarch64/aarch64.c (aarch64_if_then_else_costs): New function.
+	(aarch64_rtx_costs): Use aarch64_if_then_else_costs.
+
+	2014-06-03  Andrew Pinski  <apinski@cavium.com>
+
+	* config/aarch64/aarch64.c (aarch64_if_then_else_costs): Allow non
+	comparisons for OP0.
+
+2014-06-13  Yvan Roux  <yvan.roux@linaro.org>
+
+	* LINARO-VERSION: Bump version.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+	* LINARO-VERSION: Update.
+
+2014-06-04  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r211211.
+	2014-06-04  Bin Cheng  <bin.cheng@arm.com>
+
+	* config/aarch64/aarch64.c (aarch64_classify_address)
+	(aarch64_legitimize_reload_address): Support full addressing modes
+	for vector modes.
+	* config/aarch64/aarch64.md (mov<mode>, movmisalign<mode>)
+	(*aarch64_simd_mov<mode>, *aarch64_simd_mov<mode>): Relax predicates.
+
+2014-05-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209906.
+	2014-04-29  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* config/aarch64/arm_neon.h (vzip1_f32, vzip1_p8, vzip1_p16, vzip1_s8,
+	vzip1_s16, vzip1_s32, vzip1_u8, vzip1_u16, vzip1_u32, vzip1q_f32,
+	vzip1q_f64, vzip1q_p8, vzip1q_p16, vzip1q_s8, vzip1q_s16, vzip1q_s32,
+	vzip1q_s64, vzip1q_u8, vzip1q_u16, vzip1q_u32, vzip1q_u64, vzip2_f32,
+	vzip2_p8, vzip2_p16, vzip2_s8, vzip2_s16, vzip2_s32, vzip2_u8,
+	vzip2_u16, vzip2_u32, vzip2q_f32, vzip2q_f64, vzip2q_p8, vzip2q_p16,
+	vzip2q_s8, vzip2q_s16, vzip2q_s32, vzip2q_s64, vzip2q_u8, vzip2q_u16,
+	vzip2q_u32, vzip2q_u64): Replace inline __asm__ with __builtin_shuffle.
+
+2014-05-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209897.
+	2014-04-29  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* calls.c (initialize_argument_information): Always treat
+	PUSH_ARGS_REVERSED as 1, simplify code accordingly.
+	(expand_call): Likewise.
+	(emit_library_call_calue_1): Likewise.
+	* expr.c (PUSH_ARGS_REVERSED): Do not define.
+	(emit_push_insn): Always treat PUSH_ARGS_REVERSED as 1, simplify
+	code accordingly.
+
+2014-05-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209880.
+	2014-04-28  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* config/aarch64/aarch64-builtins.c
+	(aarch64_types_storestruct_lane_qualifiers): New.
+	(TYPES_STORESTRUCT_LANE): Likewise.
+	* config/aarch64/aarch64-simd-builtins.def (st2_lane): New.
+	(st3_lane): Likewise.
+	(st4_lane): Likewise.
+	* config/aarch64/aarch64-simd.md (vec_store_lanesoi_lane<mode>): New.
+	(vec_store_lanesci_lane<mode>): Likewise.
+	(vec_store_lanesxi_lane<mode>): Likewise.
+		(aarch64_st2_lane<VQ:mode>): Likewise.
+	(aarch64_st3_lane<VQ:mode>): Likewise.
+	(aarch64_st4_lane<VQ:mode>): Likewise.
+	* config/aarch64/aarch64.md (unspec): Add UNSPEC_ST{2,3,4}_LANE.
+	* config/aarch64/arm_neon.h
+		(__ST2_LANE_FUNC): Rewrite using builtins, update use points to
+	use new macro arguments.
+	(__ST3_LANE_FUNC): Likewise.
+	(__ST4_LANE_FUNC): Likewise.
+	* config/aarch64/iterators.md (V_TWO_ELEM): New.
+	(V_THREE_ELEM): Likewise.
+	(V_FOUR_ELEM): Likewise.
+
+2014-05-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209878.
+	2014-04-28  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* config/aarch64/aarch64-protos.h (aarch64_modes_tieable_p): New.
+	* config/aarch64/aarch64.c
+	(aarch64_cannot_change_mode_class): Weaken conditions.
+	(aarch64_modes_tieable_p): New.
+	* config/aarch64/aarch64.h (MODES_TIEABLE_P): Use it.
+
+2014-05-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209808.
+	2014-04-25  Jiong Wang  <jiong.wang@arm.com>
+
+	* config/arm/predicates.md (call_insn_operand): Add long_call check.
+	* config/arm/arm.md (sibcall, sibcall_value): Force the address to
+	reg for long_call.
+	* config/arm/arm.c (arm_function_ok_for_sibcall): Remove long_call
+	restriction.
+
+2014-05-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209806.
+	2014-04-25  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/arm/arm.c (arm_cortex_a8_tune): Initialise
+	T16-related fields.
+
+2014-05-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209742, 209749.
+	2014-04-24  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* config/aarch64/aarch64.c (aarch64_evpc_tbl): Enable for bigendian.
+
+	2014-04-24  Tejas Belagod  <tejas.belagod@arm.com>
+
+	* config/aarch64/aarch64.c (aarch64_evpc_tbl): Reverse order of elements
+	for big-endian.
+
+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209736.
+	2014-04-24  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/aarch64/aarch64-builtins.c
+	(aarch64_builtin_vectorized_function): Handle BUILT_IN_BSWAP16,
+	BUILT_IN_BSWAP32, BUILT_IN_BSWAP64.
+	* config/aarch64/aarch64-simd.md (bswap<mode>): New pattern.
+	* config/aarch64/aarch64-simd-builtins.def: Define vector bswap
+	builtins.
+	* config/aarch64/iterator.md (VDQHSD): New mode iterator.
+	(Vrevsuff): New mode attribute.
+
+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209712.
+	2014-04-23 Venkataramanan Kumar  <venkataramanan.kumar@linaro.org>
+
+	* config/aarch64/aarch64.md (stack_protect_set, stack_protect_test)
+	(stack_protect_set_<mode>, stack_protect_test_<mode>): Add
+	machine descriptions for Stack Smashing Protector.
+
+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209711.
+	2014-04-23  Richard Earnshaw  <rearnsha@arm.com>
+
+	* aarch64.md (<optab>_rol<mode>3): New pattern.
+	(<optab>_rolsi3_uxtw): Likewise.
+	* aarch64.c (aarch64_strip_shift): Handle ROTATE and ROTATERT.
+
+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209710.
+	2014-04-23  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* config/arm/arm.c (arm_cortex_a57_tune): Initialize all fields.
+	(arm_cortex_a12_tune): Likewise.
+
+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209706.
+	2014-04-23  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/aarch64/aarch64.c (aarch64_rtx_costs): Handle BSWAP.
+
+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209701, 209702, 209703, 209704, 209705.
+	2014-04-23  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/arm/arm.md (arm_rev16si2): New pattern.
+	(arm_rev16si2_alt): Likewise.
+	* config/arm/arm.c (arm_new_rtx_costs): Handle rev16 case.
+
+	2014-04-23  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+	* config/aarch64/aarch64.md (rev16<mode>2): New pattern.
+	(rev16<mode>2_alt): Likewise.
+	* config/aarch64/aarch64.c (aarch64_rtx_costs): Handle rev16 case.
+	* config/arm/aarch-common.c (aarch_rev16_shright_mask_imm_p): New.
+	(aarch_rev16_shleft_mask_imm_p): Likewise.
+	(aarch_rev16_p_1): Likewise.
+	(aarch_rev16_p): Likewise.
+	* config/arm/aarch-common-protos.h (aarch_rev16_p): Declare extern.
+	(aarch_rev16_shright_mask_imm_p): Likewise.
+	(aarch_rev16_shleft_mask_imm_p): Likewise.
+
+	2014-04-23  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/arm/aarch-common-protos.h (alu_cost_table): Add rev field.
+	* config/arm/aarch-cost-tables.h (generic_extra_costs): Specify
+	rev cost.
+	(cortex_a53_extra_costs): Likewise.
+	(cortex_a57_extra_costs): Likewise.
+	* config/arm/arm.c (cortexa9_extra_costs): Likewise.
+	(cortexa7_extra_costs): Likewise.
+	(cortexa8_extra_costs): Likewise.
+	(cortexa12_extra_costs): Likewise.
+	(cortexa15_extra_costs): Likewise.
+	(v7m_extra_costs): Likewise.
+	(arm_new_rtx_costs): Handle BSWAP.
+
+	2013-04-23  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/arm/arm.c (cortexa8_extra_costs): New table.
+	(arm_cortex_a8_tune): New tuning struct.
+	* config/arm/arm-cores.def (cortex-a8): Use cortex_a8 tuning struct.
+
+	2014-04-23  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/arm/arm.c (arm_new_rtx_costs): Handle FMA.
+
+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209659.
+	2014-04-22  Richard Henderson  <rth@redhat.com>
+
+	* config/aarch64/aarch64 (addti3, subti3): New expanders.
+	(add<GPI>3_compare0): Remove leading * from name.
+	(add<GPI>3_carryin): Likewise.
+	(sub<GPI>3_compare0): Likewise.
+	(sub<GPI>3_carryin): Likewise.
+	(<su_optab>mulditi3): New expander.
+	(multi3): New expander.
+	(madd<GPI>): Remove leading * from name.
+
+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209645.
+	2014-04-22  Andrew Pinski  <apinski@cavium.com>
+
+	* config/aarch64/aarch64.c (aarch64_load_symref_appropriately):
+	Handle TLS for ILP32.
+	* config/aarch64/aarch64.md (tlsie_small): Rename to ...
+	(tlsie_small_<mode>): this and handle PTR.
+	(tlsie_small_sidi): New pattern.
+	(tlsle_small): Change to an expand to handle ILP32.
+	(tlsle_small_<mode>): New pattern.
+	(tlsdesc_small): Rename to ...
+	(tlsdesc_small_<mode>): this and handle PTR.
+
+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209643.
+	2014-04-22  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
+
+	* config/aarch64/aarch64.c (TARGET_FLAGS_REGNUM): Define.
+
+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209641, 209642.
+	2014-04-22  Alex Velenko  <Alex.Velenko@arm.com>
+
+	* config/aarch64/aarch64-builtins.c (TYPES_REINTERP): Removed.
+	(aarch64_types_signed_unsigned_qualifiers): Qualifier added.
+	(aarch64_types_signed_poly_qualifiers): Likewise.
+	(aarch64_types_unsigned_signed_qualifiers): Likewise.
+	(aarch64_types_poly_signed_qualifiers): Likewise.
+	(TYPES_REINTERP_SS): Type macro added.
+	(TYPES_REINTERP_SU): Likewise.
+	(TYPES_REINTERP_SP): Likewise.
+	(TYPES_REINTERP_US): Likewise.
+	(TYPES_REINTERP_PS): Likewise.
+	(aarch64_fold_builtin): New expression folding added.
+	* config/aarch64/aarch64-simd-builtins.def (REINTERP):
+	Declarations removed.
+	(REINTERP_SS): Declarations added.
+	(REINTERP_US): Likewise.
+	(REINTERP_PS): Likewise.
+	(REINTERP_SU): Likewise.
+	(REINTERP_SP): Likewise.
+	* config/aarch64/arm_neon.h (vreinterpret_p8_f64): Implemented.
+	(vreinterpretq_p8_f64): Likewise.
+	(vreinterpret_p16_f64): Likewise.
+	(vreinterpretq_p16_f64): Likewise.
+	(vreinterpret_f32_f64): Likewise.
+	(vreinterpretq_f32_f64): Likewise.
+	(vreinterpret_f64_f32): Likewise.
+	(vreinterpret_f64_p8): Likewise.
+	(vreinterpret_f64_p16): Likewise.
+	(vreinterpret_f64_s8): Likewise.
+	(vreinterpret_f64_s16): Likewise.
+	(vreinterpret_f64_s32): Likewise.
+	(vreinterpret_f64_s64): Likewise.
+	(vreinterpret_f64_u8): Likewise.
+	(vreinterpret_f64_u16): Likewise.
+	(vreinterpret_f64_u32): Likewise.
+	(vreinterpret_f64_u64): Likewise.
+	(vreinterpretq_f64_f32): Likewise.
+	(vreinterpretq_f64_p8): Likewise.
+	(vreinterpretq_f64_p16): Likewise.
+	(vreinterpretq_f64_s8): Likewise.
+	(vreinterpretq_f64_s16): Likewise.
+	(vreinterpretq_f64_s32): Likewise.
+	(vreinterpretq_f64_s64): Likewise.
+	(vreinterpretq_f64_u8): Likewise.
+	(vreinterpretq_f64_u16): Likewise.
+	(vreinterpretq_f64_u32): Likewise.
+	(vreinterpretq_f64_u64): Likewise.
+	(vreinterpret_s64_f64): Likewise.
+	(vreinterpretq_s64_f64): Likewise.
+	(vreinterpret_u64_f64): Likewise.
+	(vreinterpretq_u64_f64): Likewise.
+	(vreinterpret_s8_f64): Likewise.
+	(vreinterpretq_s8_f64): Likewise.
+	(vreinterpret_s16_f64): Likewise.
+	(vreinterpretq_s16_f64): Likewise.
+	(vreinterpret_s32_f64): Likewise.
+	(vreinterpretq_s32_f64): Likewise.
+	(vreinterpret_u8_f64): Likewise.
+	(vreinterpretq_u8_f64): Likewise.
+	(vreinterpret_u16_f64): Likewise.
+	(vreinterpretq_u16_f64): Likewise.
+	(vreinterpret_u32_f64): Likewise.
+	(vreinterpretq_u32_f64): Likewise.
+
+	2014-04-22  Alex Velenko  <Alex.Velenko@arm.com>
+
+	* config/aarch64/aarch64/aarch64-builtins.c (TYPES_REINTERP): Removed.
+	* config/aarch64/aarch64/aarch64-simd-builtins.def (REINTERP): Removed.
+	(vreinterpret_p8_s8): Likewise.
+	* config/aarch64/aarch64/arm_neon.h (vreinterpret_p8_s8): Uses cast.
+	(vreinterpret_p8_s16): Likewise.
+	(vreinterpret_p8_s32): Likewise.
+	(vreinterpret_p8_s64): Likewise.
+	(vreinterpret_p8_f32): Likewise.
+	(vreinterpret_p8_u8): Likewise.
+	(vreinterpret_p8_u16): Likewise.
+	(vreinterpret_p8_u32): Likewise.
+	(vreinterpret_p8_u64): Likewise.
+	(vreinterpret_p8_p16): Likewise.
+	(vreinterpretq_p8_s8): Likewise.
+	(vreinterpretq_p8_s16): Likewise.
+	(vreinterpretq_p8_s32): Likewise.
+	(vreinterpretq_p8_s64): Likewise.
+	(vreinterpretq_p8_f32): Likewise.
+	(vreinterpretq_p8_u8): Likewise.
+	(vreinterpretq_p8_u16): Likewise.
+	(vreinterpretq_p8_u32): Likewise.
+	(vreinterpretq_p8_u64): Likewise.
+	(vreinterpretq_p8_p16): Likewise.
+	(vreinterpret_p16_s8): Likewise.
+	(vreinterpret_p16_s16): Likewise.
+	(vreinterpret_p16_s32): Likewise.
+	(vreinterpret_p16_s64): Likewise.
+	(vreinterpret_p16_f32): Likewise.
+	(vreinterpret_p16_u8): Likewise.
+	(vreinterpret_p16_u16): Likewise.
+	(vreinterpret_p16_u32): Likewise.
+	(vreinterpret_p16_u64): Likewise.
+	(vreinterpret_p16_p8): Likewise.
+	(vreinterpretq_p16_s8): Likewise.
+	(vreinterpretq_p16_s16): Likewise.
+	(vreinterpretq_p16_s32): Likewise.
+	(vreinterpretq_p16_s64): Likewise.
+	(vreinterpretq_p16_f32): Likewise.
+	(vreinterpretq_p16_u8): Likewise.
+	(vreinterpretq_p16_u16): Likewise.
+	(vreinterpretq_p16_u32): Likewise.
+	(vreinterpretq_p16_u64): Likewise.
+	(vreinterpretq_p16_p8): Likewise.
+	(vreinterpret_f32_s8): Likewise.
+	(vreinterpret_f32_s16): Likewise.
+	(vreinterpret_f32_s32): Likewise.
+	(vreinterpret_f32_s64): Likewise.
+	(vreinterpret_f32_u8): Likewise.
+	(vreinterpret_f32_u16): Likewise.
+	(vreinterpret_f32_u32): Likewise.
+	(vreinterpret_f32_u64): Likewise.
+	(vreinterpret_f32_p8): Likewise.
+	(vreinterpret_f32_p16): Likewise.
+	(vreinterpretq_f32_s8): Likewise.
+	(vreinterpretq_f32_s16): Likewise.
+	(vreinterpretq_f32_s32): Likewise.
+	(vreinterpretq_f32_s64): Likewise.
+	(vreinterpretq_f32_u8): Likewise.
+	(vreinterpretq_f32_u16): Likewise.
+	(vreinterpretq_f32_u32): Likewise.
+	(vreinterpretq_f32_u64): Likewise.
+	(vreinterpretq_f32_p8): Likewise.
+	(vreinterpretq_f32_p16): Likewise.
+	(vreinterpret_s64_s8): Likewise.
+	(vreinterpret_s64_s16): Likewise.
+	(vreinterpret_s64_s32): Likewise.
+	(vreinterpret_s64_f32): Likewise.
+	(vreinterpret_s64_u8): Likewise.
+	(vreinterpret_s64_u16): Likewise.
+	(vreinterpret_s64_u32): Likewise.
+	(vreinterpret_s64_u64): Likewise.
+	(vreinterpret_s64_p8): Likewise.
+	(vreinterpret_s64_p16): Likewise.
+	(vreinterpretq_s64_s8): Likewise.
+	(vreinterpretq_s64_s16): Likewise.
+	(vreinterpretq_s64_s32): Likewise.
+	(vreinterpretq_s64_f32): Likewise.
+	(vreinterpretq_s64_u8): Likewise.
+	(vreinterpretq_s64_u16): Likewise.
+	(vreinterpretq_s64_u32): Likewise.
+	(vreinterpretq_s64_u64): Likewise.
+	(vreinterpretq_s64_p8): Likewise.
+	(vreinterpretq_s64_p16): Likewise.
+	(vreinterpret_u64_s8): Likewise.
+	(vreinterpret_u64_s16): Likewise.
+	(vreinterpret_u64_s32): Likewise.
+	(vreinterpret_u64_s64): Likewise.
+	(vreinterpret_u64_f32): Likewise.
+	(vreinterpret_u64_u8): Likewise.
+	(vreinterpret_u64_u16): Likewise.
+	(vreinterpret_u64_u32): Likewise.
+	(vreinterpret_u64_p8): Likewise.
+	(vreinterpret_u64_p16): Likewise.
+	(vreinterpretq_u64_s8): Likewise.
+	(vreinterpretq_u64_s16): Likewise.
+	(vreinterpretq_u64_s32): Likewise.
+	(vreinterpretq_u64_s64): Likewise.
+	(vreinterpretq_u64_f32): Likewise.
+	(vreinterpretq_u64_u8): Likewise.
+	(vreinterpretq_u64_u16): Likewise.
+	(vreinterpretq_u64_u32): Likewise.
+	(vreinterpretq_u64_p8): Likewise.
+	(vreinterpretq_u64_p16): Likewise.
+	(vreinterpret_s8_s16): Likewise.
+	(vreinterpret_s8_s32): Likewise.
+	(vreinterpret_s8_s64): Likewise.
+	(vreinterpret_s8_f32): Likewise.
+	(vreinterpret_s8_u8): Likewise.
+	(vreinterpret_s8_u16): Likewise.
+	(vreinterpret_s8_u32): Likewise.
+	(vreinterpret_s8_u64): Likewise.
+	(vreinterpret_s8_p8): Likewise.
+	(vreinterpret_s8_p16): Likewise.
+	(vreinterpretq_s8_s16): Likewise.
+	(vreinterpretq_s8_s32): Likewise.
+	(vreinterpretq_s8_s64): Likewise.
+	(vreinterpretq_s8_f32): Likewise.
+	(vreinterpretq_s8_u8): Likewise.
+	(vreinterpretq_s8_u16): Likewise.
+	(vreinterpretq_s8_u32): Likewise.
+	(vreinterpretq_s8_u64): Likewise.
+	(vreinterpretq_s8_p8): Likewise.
+	(vreinterpretq_s8_p16): Likewise.
+	(vreinterpret_s16_s8): Likewise.
+	(vreinterpret_s16_s32): Likewise.
+	(vreinterpret_s16_s64): Likewise.
+	(vreinterpret_s16_f32): Likewise.
+	(vreinterpret_s16_u8): Likewise.
+	(vreinterpret_s16_u16): Likewise.
+	(vreinterpret_s16_u32): Likewise.
+	(vreinterpret_s16_u64): Likewise.
+	(vreinterpret_s16_p8): Likewise.
+	(vreinterpret_s16_p16): Likewise.
+	(vreinterpretq_s16_s8): Likewise.
+	(vreinterpretq_s16_s32): Likewise.
+	(vreinterpretq_s16_s64): Likewise.
+	(vreinterpretq_s16_f32): Likewise.
+	(vreinterpretq_s16_u8): Likewise.
+	(vreinterpretq_s16_u16): Likewise.
+	(vreinterpretq_s16_u32): Likewise.
+	(vreinterpretq_s16_u64): Likewise.
+	(vreinterpretq_s16_p8): Likewise.
+	(vreinterpretq_s16_p16): Likewise.
+	(vreinterpret_s32_s8): Likewise.
+	(vreinterpret_s32_s16): Likewise.
+	(vreinterpret_s32_s64): Likewise.
+	(vreinterpret_s32_f32): Likewise.
+	(vreinterpret_s32_u8): Likewise.
+	(vreinterpret_s32_u16): Likewise.
+	(vreinterpret_s32_u32): Likewise.
+	(vreinterpret_s32_u64): Likewise.
+	(vreinterpret_s32_p8): Likewise.
+	(vreinterpret_s32_p16): Likewise.
+	(vreinterpretq_s32_s8): Likewise.
+	(vreinterpretq_s32_s16): Likewise.
+	(vreinterpretq_s32_s64): Likewise.
+	(vreinterpretq_s32_f32): Likewise.
+	(vreinterpretq_s32_u8): Likewise.
+	(vreinterpretq_s32_u16): Likewise.
+	(vreinterpretq_s32_u32): Likewise.
+	(vreinterpretq_s32_u64): Likewise.
+	(vreinterpretq_s32_p8): Likewise.
+	(vreinterpretq_s32_p16): Likewise.
+	(vreinterpret_u8_s8): Likewise.
+	(vreinterpret_u8_s16): Likewise.
+	(vreinterpret_u8_s32): Likewise.
+	(vreinterpret_u8_s64): Likewise.
+	(vreinterpret_u8_f32): Likewise.
+	(vreinterpret_u8_u16): Likewise.
+	(vreinterpret_u8_u32): Likewise.
+	(vreinterpret_u8_u64): Likewise.
+	(vreinterpret_u8_p8): Likewise.
+	(vreinterpret_u8_p16): Likewise.
+	(vreinterpretq_u8_s8): Likewise.
+	(vreinterpretq_u8_s16): Likewise.
+	(vreinterpretq_u8_s32): Likewise.
+	(vreinterpretq_u8_s64): Likewise.
+	(vreinterpretq_u8_f32): Likewise.
+	(vreinterpretq_u8_u16): Likewise.
+	(vreinterpretq_u8_u32): Likewise.
+	(vreinterpretq_u8_u64): Likewise.
+	(vreinterpretq_u8_p8): Likewise.
+	(vreinterpretq_u8_p16): Likewise.
+	(vreinterpret_u16_s8): Likewise.
+	(vreinterpret_u16_s16): Likewise.
+	(vreinterpret_u16_s32): Likewise.
+	(vreinterpret_u16_s64): Likewise.
+	(vreinterpret_u16_f32): Likewise.
+	(vreinterpret_u16_u8): Likewise.
+	(vreinterpret_u16_u32): Likewise.
+	(vreinterpret_u16_u64): Likewise.
+	(vreinterpret_u16_p8): Likewise.
+	(vreinterpret_u16_p16): Likewise.
+	(vreinterpretq_u16_s8): Likewise.
+	(vreinterpretq_u16_s16): Likewise.
+	(vreinterpretq_u16_s32): Likewise.
+	(vreinterpretq_u16_s64): Likewise.
+	(vreinterpretq_u16_f32): Likewise.
+	(vreinterpretq_u16_u8): Likewise.
+	(vreinterpretq_u16_u32): Likewise.
+	(vreinterpretq_u16_u64): Likewise.
+	(vreinterpretq_u16_p8): Likewise.
+	(vreinterpretq_u16_p16): Likewise.
+	(vreinterpret_u32_s8): Likewise.
+	(vreinterpret_u32_s16): Likewise.
+	(vreinterpret_u32_s32): Likewise.
+	(vreinterpret_u32_s64): Likewise.
+	(vreinterpret_u32_f32): Likewise.
+	(vreinterpret_u32_u8): Likewise.
+	(vreinterpret_u32_u16): Likewise.
+	(vreinterpret_u32_u64): Likewise.
+	(vreinterpret_u32_p8): Likewise.
+	(vreinterpret_u32_p16): Likewise.
+	(vreinterpretq_u32_s8): Likewise.
+	(vreinterpretq_u32_s16): Likewise.
+	(vreinterpretq_u32_s32): Likewise.
+	(vreinterpretq_u32_s64): Likewise.
+	(vreinterpretq_u32_f32): Likewise.
+	(vreinterpretq_u32_u8): Likewise.
+	(vreinterpretq_u32_u16): Likewise.
+	(vreinterpretq_u32_u64): Likewise.
+	(vreinterpretq_u32_p8): Likewise.
+	(vreinterpretq_u32_p16): Likewise.
+
+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209640.
+	2014-04-22  Alex Velenko  <Alex.Velenko@arm.com>
+
+	* gcc/config/aarch64/aarch64-simd.md (aarch64_s<optab><mode>):
+	Pattern extended.
+	* config/aarch64/aarch64-simd-builtins.def (sqneg): Iterator
+	extended.
+	(sqabs): Likewise.
+	* config/aarch64/arm_neon.h (vqneg_s64): New intrinsic.
+	(vqnegd_s64): Likewise.
+	(vqabs_s64): Likewise.
+	(vqabsd_s64): Likewise.
+
+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209627, 209636.
+	2014-04-22  Renlin  <renlin.li@arm.com>
+		    Jiong Wang  <jiong.wang@arm.com>
+
+	* config/aarch64/aarch64.h (aarch64_frame): Delete "fp_lr_offset".
+	* config/aarch64/aarch64.c (aarch64_layout_frame)
+	(aarch64_initial_elimination_offset): Likewise.
+
+	2014-04-22  Marcus Shawcroft  <marcus.shawcroft@arm.com>
+
+	* config/aarch64/aarch64.c (aarch64_initial_elimination_offset):
+	Fix indentation.
+
+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209618.
+	2014-04-22  Renlin Li  <Renlin.Li@arm.com>
+
+	* config/aarch64/aarch64.c (aarch64_print_operand_address): Adjust
+	the output asm format.
+
+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209617.
+	2014-04-22  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* config/aarch64/aarch64-simd.md
+	(aarch64_cm<optab>di): Always split.
+	(*aarch64_cm<optab>di): New.
+	(aarch64_cmtstdi): Always split.
+	(*aarch64_cmtstdi): New.
+
+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209615.
+	2014-04-22  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
+
+	* config/arm/arm.c (arm_hard_regno_mode_ok): Loosen
+	restrictions on core registers for DImode values in Thumb2.
+
+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209613, r209614.
+	2014-04-22  Ian Bolton  <ian.bolton@arm.com>
+
+	* config/arm/arm.md (*anddi_notdi_zesidi): New pattern.
+	* config/arm/thumb2.md (*iordi_notdi_zesidi): New pattern.
+
+	2014-04-22  Ian Bolton  <ian.bolton@arm.com>
+
+	* config/arm/thumb2.md (*iordi_notdi_di): New pattern.
+	(*iordi_notzesidi_di): Likewise.
+	(*iordi_notsesidi_di): Likewise.
+
+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209561.
+	2014-04-22  Ian Bolton  <ian.bolton@arm.com>
+
+	* config/arm/arm-protos.h (tune_params): New struct members.
+	* config/arm/arm.c: Initialise tune_params per processor.
+	(thumb2_reorg): Suppress conversion from t32 to t16 when optimizing
+	for speed, based on new tune_params.
+
+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209559.
+	2014-04-22  Alex Velenko  <Alex.Velenko@arm.com>
+
+	* config/aarch64/aarch64-builtins.c (BUILTIN_VDQF_DF): Macro
+	added.
+	* config/aarch64/aarch64-simd-builtins.def (frintn): Use added
+	macro.
+	* config/aarch64/aarch64-simd.md (<frint_pattern>): Comment
+	corrected.
+	* config/aarch64/aarch64.md (<frint_pattern>): Likewise.
+	* config/aarch64/arm_neon.h (vrnd_f64): Added.
+	(vrnda_f64): Likewise.
+	(vrndi_f64): Likewise.
+	(vrndm_f64): Likewise.
+	(vrndn_f64): Likewise.
+	(vrndp_f64): Likewise.
+	(vrndx_f64): Likewise.
+
+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209419.
+	2014-04-15  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	PR rtl-optimization/60663
+	* config/arm/arm.c (arm_new_rtx_costs): Improve ASM_OPERANDS case,
+	avoid 0 cost.
+
+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209457.
+	2014-04-16  Andrew  Pinski  <apinski@cavium.com>
+
+	* config/host-linux.c (TRY_EMPTY_VM_SPACE): Change aarch64 ilp32
+	definition.
+
+2014-05-19  Yvan Roux  <yvan.roux@linaro.org>
+
+	* LINARO-VERSION: Bump version.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+	* LINARO-VERSION: Update.
+
+2014-05-13  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209889.
+	2014-04-29  Zhenqiang Chen  <zhenqiang.chen@linaro.org>
+
+	* config/aarch64/aarch64.md (mov<mode>cc): New for GPF.
+
+2014-05-13  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209556.
+	2014-04-22  Zhenqiang Chen  <zhenqiang.chen@linaro.org>
+
+	* config/arm/arm.c (arm_print_operand, thumb_exit): Make sure
+	GET_MODE_SIZE argument is enum machine_mode.
+
+2014-04-28  Yvan Roux  <yvan.roux@linaro.org>
+
+	* LINARO-VERSION: Bump version.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
+	* LINARO-VERSION: New file.
+	* configure.ac: Add Linaro version string.
--- a/src/gcc/testsuite/gcc.target/arm/pr44788.c
+++ b/src/gcc/testsuite/gcc.target/arm/pr44788.c
@@ -2,6 +2,8 @@
 /* { dg-require-effective-target arm_thumb2_ok } */
 /* { dg-options "-Os -fno-strict-aliasing -fPIC -mthumb -march=armv7-a -mfpu=vfp3 -mfloat-abi=softfp" } */
 
+extern void foo (float *);
+
 void joint_decode(float* mlt_buffer1, int t) {
     int i;
     float decode_buffer[1060];
--- a/src/gcc/testsuite/gcc.target/arm/vect-rounding-floorf.c
+++ b/src/gcc/testsuite/gcc.target/arm/vect-rounding-floorf.c
@@ -5,8 +5,11 @@
 
 #define N 32
 
+float __attribute__((aligned(16))) input[N];
+float __attribute__((aligned(16))) output[N];
+
 void
-foo (float *output, float *input)
+foo ()
 {
   int i = 0;
   /* Vectorizable.  */
--- a/src/gcc/testsuite/gcc.target/arm/vect-lceilf_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/vect-lceilf_1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_neon_ok } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-all" } */
+/* { dg-add-options arm_v8_neon } */
+
+#define N 32
+
+float __attribute__((aligned(16))) input[N];
+int __attribute__((aligned(16))) output[N];
+
+void
+foo ()
+{
+  int i = 0;
+  /* Vectorizable.  */
+  for (i = 0; i < N; i++)
+    output[i] = __builtin_lceilf (input[i]);
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vornQs64.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vornQs64.c
@@ -3,16 +3,16 @@
 
 /* { dg-do assemble } */
 /* { dg-require-effective-target arm_neon_ok } */
-/* { dg-options "-save-temps -O0" } */
+/* { dg-options "-save-temps -O2" } */
 /* { dg-add-options arm_neon } */
 
 #include "arm_neon.h"
 
+int64x2_t out_int64x2_t;
+int64x2_t arg0_int64x2_t;
+int64x2_t arg1_int64x2_t;
 void test_vornQs64 (void)
 {
-  int64x2_t out_int64x2_t;
-  int64x2_t arg0_int64x2_t;
-  int64x2_t arg1_int64x2_t;
 
   out_int64x2_t = vornq_s64 (arg0_int64x2_t, arg1_int64x2_t);
 }
--- a/src/gcc/testsuite/gcc.target/arm/neon/vbicu64.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vbicu64.c
@@ -3,16 +3,16 @@
 
 /* { dg-do assemble } */
 /* { dg-require-effective-target arm_neon_ok } */
-/* { dg-options "-save-temps -O0" } */
+/* { dg-options "-save-temps -O2" } */
 /* { dg-add-options arm_neon } */
 
 #include "arm_neon.h"
 
+uint64x1_t out_uint64x1_t;
+uint64x1_t arg0_uint64x1_t;
+uint64x1_t arg1_uint64x1_t;
 void test_vbicu64 (void)
 {
-  uint64x1_t out_uint64x1_t;
-  uint64x1_t arg0_uint64x1_t;
-  uint64x1_t arg1_uint64x1_t;
 
   out_uint64x1_t = vbic_u64 (arg0_uint64x1_t, arg1_uint64x1_t);
 }
--- a/src/gcc/testsuite/gcc.target/arm/neon/vorns64.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vorns64.c
@@ -3,16 +3,16 @@
 
 /* { dg-do assemble } */
 /* { dg-require-effective-target arm_neon_ok } */
-/* { dg-options "-save-temps -O0" } */
+/* { dg-options "-save-temps -O2" } */
 /* { dg-add-options arm_neon } */
 
 #include "arm_neon.h"
 
+int64x1_t out_int64x1_t;
+int64x1_t arg0_int64x1_t;
+int64x1_t arg1_int64x1_t;
 void test_vorns64 (void)
 {
-  int64x1_t out_int64x1_t;
-  int64x1_t arg0_int64x1_t;
-  int64x1_t arg1_int64x1_t;
 
   out_int64x1_t = vorn_s64 (arg0_int64x1_t, arg1_int64x1_t);
 }
--- a/src/gcc/testsuite/gcc.target/arm/neon/vbics8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vbics8.c
@@ -3,16 +3,16 @@
 
 /* { dg-do assemble } */
 /* { dg-require-effective-target arm_neon_ok } */
-/* { dg-options "-save-temps -O0" } */
+/* { dg-options "-save-temps -O2" } */
 /* { dg-add-options arm_neon } */
 
 #include "arm_neon.h"
 
+int8x8_t out_int8x8_t;
+int8x8_t arg0_int8x8_t;
+int8x8_t arg1_int8x8_t;
 void test_vbics8 (void)
 {
-  int8x8_t out_int8x8_t;
-  int8x8_t arg0_int8x8_t;
-  int8x8_t arg1_int8x8_t;
 
   out_int8x8_t = vbic_s8 (arg0_int8x8_t, arg1_int8x8_t);
 }
--- a/src/gcc/testsuite/gcc.target/arm/neon/vornQu64.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vornQu64.c
@@ -3,16 +3,16 @@
 
 /* { dg-do assemble } */
 /* { dg-require-effective-target arm_neon_ok } */
-/* { dg-options "-save-temps -O0" } */
+/* { dg-options "-save-temps -O2" } */
 /* { dg-add-options arm_neon } */
 
 #include "arm_neon.h"
 
+uint64x2_t out_uint64x2_t;
+uint64x2_t arg0_uint64x2_t;
+uint64x2_t arg1_uint64x2_t;
 void test_vornQu64 (void)
 {
-  uint64x2_t out_uint64x2_t;
-  uint64x2_t arg0_uint64x2_t;
-  uint64x2_t arg1_uint64x2_t;
 
   out_uint64x2_t = vornq_u64 (arg0_uint64x2_t, arg1_uint64x2_t);
 }
--- a/src/gcc/testsuite/gcc.target/arm/neon/vornu64.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vornu64.c
@@ -3,16 +3,16 @@
 
 /* { dg-do assemble } */
 /* { dg-require-effective-target arm_neon_ok } */
-/* { dg-options "-save-temps -O0" } */
+/* { dg-options "-save-temps -O2" } */
 /* { dg-add-options arm_neon } */
 
 #include "arm_neon.h"
 
+uint64x1_t out_uint64x1_t;
+uint64x1_t arg0_uint64x1_t;
+uint64x1_t arg1_uint64x1_t;
 void test_vornu64 (void)
 {
-  uint64x1_t out_uint64x1_t;
-  uint64x1_t arg0_uint64x1_t;
-  uint64x1_t arg1_uint64x1_t;
 
   out_uint64x1_t = vorn_u64 (arg0_uint64x1_t, arg1_uint64x1_t);
 }
--- a/src/gcc/testsuite/gcc.target/arm/neon/vbics32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vbics32.c
@@ -3,16 +3,16 @@
 
 /* { dg-do assemble } */
 /* { dg-require-effective-target arm_neon_ok } */
-/* { dg-options "-save-temps -O0" } */
+/* { dg-options "-save-temps -O2" } */
 /* { dg-add-options arm_neon } */
 
 #include "arm_neon.h"
 
+int32x2_t out_int32x2_t;
+int32x2_t arg0_int32x2_t;
+int32x2_t arg1_int32x2_t;
 void test_vbics32 (void)
 {
-  int32x2_t out_int32x2_t;
-  int32x2_t arg0_int32x2_t;
-  int32x2_t arg1_int32x2_t;
 
   out_int32x2_t = vbic_s32 (arg0_int32x2_t, arg1_int32x2_t);
 }
--- a/src/gcc/testsuite/gcc.target/arm/neon/vbicu8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vbicu8.c
@@ -3,16 +3,16 @@
 
 /* { dg-do assemble } */
 /* { dg-require-effective-target arm_neon_ok } */
-/* { dg-options "-save-temps -O0" } */
+/* { dg-options "-save-temps -O2" } */
 /* { dg-add-options arm_neon } */
 
 #include "arm_neon.h"
 
+uint8x8_t out_uint8x8_t;
+uint8x8_t arg0_uint8x8_t;
+uint8x8_t arg1_uint8x8_t;
 void test_vbicu8 (void)
 {
-  uint8x8_t out_uint8x8_t;
-  uint8x8_t arg0_uint8x8_t;
-  uint8x8_t arg1_uint8x8_t;
 
   out_uint8x8_t = vbic_u8 (arg0_uint8x8_t, arg1_uint8x8_t);
 }
--- a/src/gcc/testsuite/gcc.target/arm/neon/vbics16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vbics16.c
@@ -3,16 +3,16 @@
 
 /* { dg-do assemble } */
 /* { dg-require-effective-target arm_neon_ok } */
-/* { dg-options "-save-temps -O0" } */
+/* { dg-options "-save-temps -O2" } */
 /* { dg-add-options arm_neon } */
 
 #include "arm_neon.h"
 
+int16x4_t out_int16x4_t;
+int16x4_t arg0_int16x4_t;
+int16x4_t arg1_int16x4_t;
 void test_vbics16 (void)
 {
-  int16x4_t out_int16x4_t;
-  int16x4_t arg0_int16x4_t;
-  int16x4_t arg1_int16x4_t;
 
   out_int16x4_t = vbic_s16 (arg0_int16x4_t, arg1_int16x4_t);
 }
--- a/src/gcc/testsuite/gcc.target/arm/neon/vorns8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vorns8.c
@@ -3,16 +3,16 @@
 
 /* { dg-do assemble } */
 /* { dg-require-effective-target arm_neon_ok } */
-/* { dg-options "-save-temps -O0" } */
+/* { dg-options "-save-temps -O2" } */
 /* { dg-add-options arm_neon } */
 
 #include "arm_neon.h"
 
+int8x8_t out_int8x8_t;
+int8x8_t arg0_int8x8_t;
+int8x8_t arg1_int8x8_t;
 void test_vorns8 (void)
 {
-  int8x8_t out_int8x8_t;
-  int8x8_t arg0_int8x8_t;
-  int8x8_t arg1_int8x8_t;
 
   out_int8x8_t = vorn_s8 (arg0_int8x8_t, arg1_int8x8_t);
 }
--- a/src/gcc/testsuite/gcc.target/arm/neon/vbicQs8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vbicQs8.c
@@ -3,16 +3,16 @@
 
 /* { dg-do assemble } */
 /* { dg-require-effective-target arm_neon_ok } */
-/* { dg-options "-save-temps -O0" } */
+/* { dg-options "-save-temps -O2" } */
 /* { dg-add-options arm_neon } */
 
 #include "arm_neon.h"
 
+int8x16_t out_int8x16_t;
+int8x16_t arg0_int8x16_t;
+int8x16_t arg1_int8x16_t;
 void test_vbicQs8 (void)
 {
-  int8x16_t out_int8x16_t;
-  int8x16_t arg0_int8x16_t;
-  int8x16_t arg1_int8x16_t;
 
   out_int8x16_t = vbicq_s8 (arg0_int8x16_t, arg1_int8x16_t);
 }
--- a/src/gcc/testsuite/gcc.target/arm/neon/vornQs32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vornQs32.c
@@ -3,16 +3,16 @@
 
 /* { dg-do assemble } */
 /* { dg-require-effective-target arm_neon_ok } */
-/* { dg-options "-save-temps -O0" } */
+/* { dg-options "-save-temps -O2" } */
 /* { dg-add-options arm_neon } */
 
 #include "arm_neon.h"
 
+int32x4_t out_int32x4_t;
+int32x4_t arg0_int32x4_t;
+int32x4_t arg1_int32x4_t;
 void test_vornQs32 (void)
 {
-  int32x4_t out_int32x4_t;
-  int32x4_t arg0_int32x4_t;
-  int32x4_t arg1_int32x4_t;
 
   out_int32x4_t = vornq_s32 (arg0_int32x4_t, arg1_int32x4_t);
 }
--- a/src/gcc/testsuite/gcc.target/arm/neon/vornQs16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vornQs16.c
@@ -3,16 +3,16 @@
 
 /* { dg-do assemble } */
 /* { dg-require-effective-target arm_neon_ok } */
-/* { dg-options "-save-temps -O0" } */
+/* { dg-options "-save-temps -O2" } */
 /* { dg-add-options arm_neon } */
 
 #include "arm_neon.h"
 
+int16x8_t out_int16x8_t;
+int16x8_t arg0_int16x8_t;
+int16x8_t arg1_int16x8_t;
 void test_vornQs16 (void)
 {
-  int16x8_t out_int16x8_t;
-  int16x8_t arg0_int16x8_t;
-  int16x8_t arg1_int16x8_t;
 
   out_int16x8_t = vornq_s16 (arg0_int16x8_t, arg1_int16x8_t);
 }
--- a/src/gcc/testsuite/gcc.target/arm/neon/vbicu32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vbicu32.c
@@ -3,16 +3,16 @@
 
 /* { dg-do assemble } */
 /* { dg-require-effective-target arm_neon_ok } */
-/* { dg-options "-save-temps -O0" } */
+/* { dg-options "-save-temps -O2" } */
 /* { dg-add-options arm_neon } */
 
 #include "arm_neon.h"
 
+uint32x2_t out_uint32x2_t;
+uint32x2_t arg0_uint32x2_t;
+uint32x2_t arg1_uint32x2_t;
 void test_vbicu32 (void)
 {
-  uint32x2_t out_uint32x2_t;
-  uint32x2_t arg0_uint32x2_t;
-  uint32x2_t arg1_uint32x2_t;
 
   out_uint32x2_t = vbic_u32 (arg0_uint32x2_t, arg1_uint32x2_t);
 }
--- a/src/gcc/testsuite/gcc.target/arm/neon/vbicQs64.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vbicQs64.c
@@ -3,16 +3,16 @@
 
 /* { dg-do assemble } */
 /* { dg-require-effective-target arm_neon_ok } */
-/* { dg-options "-save-temps -O0" } */
+/* { dg-options "-save-temps -O2" } */
 /* { dg-add-options arm_neon } */
 
 #include "arm_neon.h"
 
+int64x2_t out_int64x2_t;
+int64x2_t arg0_int64x2_t;
+int64x2_t arg1_int64x2_t;
 void test_vbicQs64 (void)
 {
-  int64x2_t out_int64x2_t;
-  int64x2_t arg0_int64x2_t;
-  int64x2_t arg1_int64x2_t;
 
   out_int64x2_t = vbicq_s64 (arg0_int64x2_t, arg1_int64x2_t);
 }
--- a/src/gcc/testsuite/gcc.target/arm/neon/vornu8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vornu8.c
@@ -3,16 +3,16 @@
 
 /* { dg-do assemble } */
 /* { dg-require-effective-target arm_neon_ok } */
-/* { dg-options "-save-temps -O0" } */
+/* { dg-options "-save-temps -O2" } */
 /* { dg-add-options arm_neon } */
 
 #include "arm_neon.h"
 
+uint8x8_t out_uint8x8_t;
+uint8x8_t arg0_uint8x8_t;
+uint8x8_t arg1_uint8x8_t;
 void test_vornu8 (void)
 {
-  uint8x8_t out_uint8x8_t;
-  uint8x8_t arg0_uint8x8_t;
-  uint8x8_t arg1_uint8x8_t;
 
   out_uint8x8_t = vorn_u8 (arg0_uint8x8_t, arg1_uint8x8_t);
 }
--- a/src/gcc/testsuite/gcc.target/arm/neon/vorns32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vorns32.c
@@ -3,16 +3,16 @@
 
 /* { dg-do assemble } */
 /* { dg-require-effective-target arm_neon_ok } */
-/* { dg-options "-save-temps -O0" } */
+/* { dg-options "-save-temps -O2" } */
 /* { dg-add-options arm_neon } */
 
 #include "arm_neon.h"
 
+int32x2_t out_int32x2_t;
+int32x2_t arg0_int32x2_t;
+int32x2_t arg1_int32x2_t;
 void test_vorns32 (void)
 {
-  int32x2_t out_int32x2_t;
-  int32x2_t arg0_int32x2_t;
-  int32x2_t arg1_int32x2_t;
 
   out_int32x2_t = vorn_s32 (arg0_int32x2_t, arg1_int32x2_t);
 }
--- a/src/gcc/testsuite/gcc.target/arm/neon/vbicu16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vbicu16.c
@@ -3,16 +3,16 @@
 
 /* { dg-do assemble } */
 /* { dg-require-effective-target arm_neon_ok } */
-/* { dg-options "-save-temps -O0" } */
+/* { dg-options "-save-temps -O2" } */
 /* { dg-add-options arm_neon } */
 
 #include "arm_neon.h"
 
+uint16x4_t out_uint16x4_t;
+uint16x4_t arg0_uint16x4_t;
+uint16x4_t arg1_uint16x4_t;
 void test_vbicu16 (void)
 {
-  uint16x4_t out_uint16x4_t;
-  uint16x4_t arg0_uint16x4_t;
-  uint16x4_t arg1_uint16x4_t;
 
   out_uint16x4_t = vbic_u16 (arg0_uint16x4_t, arg1_uint16x4_t);
 }
--- a/src/gcc/testsuite/gcc.target/arm/neon/vorns16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vorns16.c
@@ -3,16 +3,16 @@
 
 /* { dg-do assemble } */
 /* { dg-require-effective-target arm_neon_ok } */
-/* { dg-options "-save-temps -O0" } */
+/* { dg-options "-save-temps -O2" } */
 /* { dg-add-options arm_neon } */
 
 #include "arm_neon.h"
 
+int16x4_t out_int16x4_t;
+int16x4_t arg0_int16x4_t;
+int16x4_t arg1_int16x4_t;
 void test_vorns16 (void)
 {
-  int16x4_t out_int16x4_t;
-  int16x4_t arg0_int16x4_t;
-  int16x4_t arg1_int16x4_t;
 
   out_int16x4_t = vorn_s16 (arg0_int16x4_t, arg1_int16x4_t);
 }
--- a/src/gcc/testsuite/gcc.target/arm/neon/vbicQu8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vbicQu8.c
@@ -3,16 +3,16 @@
 
 /* { dg-do assemble } */
 /* { dg-require-effective-target arm_neon_ok } */
-/* { dg-options "-save-temps -O0" } */
+/* { dg-options "-save-temps -O2" } */
 /* { dg-add-options arm_neon } */
 
 #include "arm_neon.h"
 
+uint8x16_t out_uint8x16_t;
+uint8x16_t arg0_uint8x16_t;
+uint8x16_t arg1_uint8x16_t;
 void test_vbicQu8 (void)
 {
-  uint8x16_t out_uint8x16_t;
-  uint8x16_t arg0_uint8x16_t;
-  uint8x16_t arg1_uint8x16_t;
 
   out_uint8x16_t = vbicq_u8 (arg0_uint8x16_t, arg1_uint8x16_t);
 }
--- a/src/gcc/testsuite/gcc.target/arm/neon/vornQu32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vornQu32.c
@@ -3,16 +3,16 @@
 
 /* { dg-do assemble } */
 /* { dg-require-effective-target arm_neon_ok } */
-/* { dg-options "-save-temps -O0" } */
+/* { dg-options "-save-temps -O2" } */
 /* { dg-add-options arm_neon } */
 
 #include "arm_neon.h"
 
+uint32x4_t out_uint32x4_t;
+uint32x4_t arg0_uint32x4_t;
+uint32x4_t arg1_uint32x4_t;
 void test_vornQu32 (void)
 {
-  uint32x4_t out_uint32x4_t;
-  uint32x4_t arg0_uint32x4_t;
-  uint32x4_t arg1_uint32x4_t;
 
   out_uint32x4_t = vornq_u32 (arg0_uint32x4_t, arg1_uint32x4_t);
 }
--- a/src/gcc/testsuite/gcc.target/arm/neon/vornQs8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vornQs8.c
@@ -3,16 +3,16 @@
 
 /* { dg-do assemble } */
 /* { dg-require-effective-target arm_neon_ok } */
-/* { dg-options "-save-temps -O0" } */
+/* { dg-options "-save-temps -O2" } */
 /* { dg-add-options arm_neon } */
 
 #include "arm_neon.h"
 
+int8x16_t out_int8x16_t;
+int8x16_t arg0_int8x16_t;
+int8x16_t arg1_int8x16_t;
 void test_vornQs8 (void)
 {
-  int8x16_t out_int8x16_t;
-  int8x16_t arg0_int8x16_t;
-  int8x16_t arg1_int8x16_t;
 
   out_int8x16_t = vornq_s8 (arg0_int8x16_t, arg1_int8x16_t);
 }
--- a/src/gcc/testsuite/gcc.target/arm/neon/vornQu16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vornQu16.c
@@ -3,16 +3,16 @@
 
 /* { dg-do assemble } */
 /* { dg-require-effective-target arm_neon_ok } */
-/* { dg-options "-save-temps -O0" } */
+/* { dg-options "-save-temps -O2" } */
 /* { dg-add-options arm_neon } */
 
 #include "arm_neon.h"
 
+uint16x8_t out_uint16x8_t;
+uint16x8_t arg0_uint16x8_t;
+uint16x8_t arg1_uint16x8_t;
 void test_vornQu16 (void)
 {
-  uint16x8_t out_uint16x8_t;
-  uint16x8_t arg0_uint16x8_t;
-  uint16x8_t arg1_uint16x8_t;
 
   out_uint16x8_t = vornq_u16 (arg0_uint16x8_t, arg1_uint16x8_t);
 }
--- a/src/gcc/testsuite/gcc.target/arm/neon/vbicQu64.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vbicQu64.c
@@ -3,16 +3,16 @@
 
 /* { dg-do assemble } */
 /* { dg-require-effective-target arm_neon_ok } */
-/* { dg-options "-save-temps -O0" } */
+/* { dg-options "-save-temps -O2" } */
 /* { dg-add-options arm_neon } */
 
 #include "arm_neon.h"
 
+uint64x2_t out_uint64x2_t;
+uint64x2_t arg0_uint64x2_t;
+uint64x2_t arg1_uint64x2_t;
 void test_vbicQu64 (void)
 {
-  uint64x2_t out_uint64x2_t;
-  uint64x2_t arg0_uint64x2_t;
-  uint64x2_t arg1_uint64x2_t;
 
   out_uint64x2_t = vbicq_u64 (arg0_uint64x2_t, arg1_uint64x2_t);
 }
--- a/src/gcc/testsuite/gcc.target/arm/neon/vornu32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vornu32.c
@@ -3,16 +3,16 @@
 
 /* { dg-do assemble } */
 /* { dg-require-effective-target arm_neon_ok } */
-/* { dg-options "-save-temps -O0" } */
+/* { dg-options "-save-temps -O2" } */
 /* { dg-add-options arm_neon } */
 
 #include "arm_neon.h"
 
+uint32x2_t out_uint32x2_t;
+uint32x2_t arg0_uint32x2_t;
+uint32x2_t arg1_uint32x2_t;
 void test_vornu32 (void)
 {
-  uint32x2_t out_uint32x2_t;
-  uint32x2_t arg0_uint32x2_t;
-  uint32x2_t arg1_uint32x2_t;
 
   out_uint32x2_t = vorn_u32 (arg0_uint32x2_t, arg1_uint32x2_t);
 }
--- a/src/gcc/testsuite/gcc.target/arm/neon/vornu16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vornu16.c
@@ -3,16 +3,16 @@
 
 /* { dg-do assemble } */
 /* { dg-require-effective-target arm_neon_ok } */
-/* { dg-options "-save-temps -O0" } */
+/* { dg-options "-save-temps -O2" } */
 /* { dg-add-options arm_neon } */
 
 #include "arm_neon.h"
 
+uint16x4_t out_uint16x4_t;
+uint16x4_t arg0_uint16x4_t;
+uint16x4_t arg1_uint16x4_t;
 void test_vornu16 (void)
 {
-  uint16x4_t out_uint16x4_t;
-  uint16x4_t arg0_uint16x4_t;
-  uint16x4_t arg1_uint16x4_t;
 
   out_uint16x4_t = vorn_u16 (arg0_uint16x4_t, arg1_uint16x4_t);
 }
--- a/src/gcc/testsuite/gcc.target/arm/neon/vornQu8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vornQu8.c
@@ -3,16 +3,16 @@
 
 /* { dg-do assemble } */
 /* { dg-require-effective-target arm_neon_ok } */
-/* { dg-options "-save-temps -O0" } */
+/* { dg-options "-save-temps -O2" } */
 /* { dg-add-options arm_neon } */
 
 #include "arm_neon.h"
 
+uint8x16_t out_uint8x16_t;
+uint8x16_t arg0_uint8x16_t;
+uint8x16_t arg1_uint8x16_t;
 void test_vornQu8 (void)
 {
-  uint8x16_t out_uint8x16_t;
-  uint8x16_t arg0_uint8x16_t;
-  uint8x16_t arg1_uint8x16_t;
 
   out_uint8x16_t = vornq_u8 (arg0_uint8x16_t, arg1_uint8x16_t);
 }
--- a/src/gcc/testsuite/gcc.target/arm/neon/vbicQs32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vbicQs32.c
@@ -3,16 +3,16 @@
 
 /* { dg-do assemble } */
 /* { dg-require-effective-target arm_neon_ok } */
-/* { dg-options "-save-temps -O0" } */
+/* { dg-options "-save-temps -O2" } */
 /* { dg-add-options arm_neon } */
 
 #include "arm_neon.h"
 
+int32x4_t out_int32x4_t;
+int32x4_t arg0_int32x4_t;
+int32x4_t arg1_int32x4_t;
 void test_vbicQs32 (void)
 {
-  int32x4_t out_int32x4_t;
-  int32x4_t arg0_int32x4_t;
-  int32x4_t arg1_int32x4_t;
 
   out_int32x4_t = vbicq_s32 (arg0_int32x4_t, arg1_int32x4_t);
 }
--- a/src/gcc/testsuite/gcc.target/arm/neon/vbicQs16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vbicQs16.c
@@ -3,16 +3,16 @@
 
 /* { dg-do assemble } */
 /* { dg-require-effective-target arm_neon_ok } */
-/* { dg-options "-save-temps -O0" } */
+/* { dg-options "-save-temps -O2" } */
 /* { dg-add-options arm_neon } */
 
 #include "arm_neon.h"
 
+int16x8_t out_int16x8_t;
+int16x8_t arg0_int16x8_t;
+int16x8_t arg1_int16x8_t;
 void test_vbicQs16 (void)
 {
-  int16x8_t out_int16x8_t;
-  int16x8_t arg0_int16x8_t;
-  int16x8_t arg1_int16x8_t;
 
   out_int16x8_t = vbicq_s16 (arg0_int16x8_t, arg1_int16x8_t);
 }
--- a/src/gcc/testsuite/gcc.target/arm/neon/vbicQu32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vbicQu32.c
@@ -3,16 +3,16 @@
 
 /* { dg-do assemble } */
 /* { dg-require-effective-target arm_neon_ok } */
-/* { dg-options "-save-temps -O0" } */
+/* { dg-options "-save-temps -O2" } */
 /* { dg-add-options arm_neon } */
 
 #include "arm_neon.h"
 
+uint32x4_t out_uint32x4_t;
+uint32x4_t arg0_uint32x4_t;
+uint32x4_t arg1_uint32x4_t;
 void test_vbicQu32 (void)
 {
-  uint32x4_t out_uint32x4_t;
-  uint32x4_t arg0_uint32x4_t;
-  uint32x4_t arg1_uint32x4_t;
 
   out_uint32x4_t = vbicq_u32 (arg0_uint32x4_t, arg1_uint32x4_t);
 }
--- a/src/gcc/testsuite/gcc.target/arm/neon/vbicQu16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vbicQu16.c
@@ -3,16 +3,16 @@
 
 /* { dg-do assemble } */
 /* { dg-require-effective-target arm_neon_ok } */
-/* { dg-options "-save-temps -O0" } */
+/* { dg-options "-save-temps -O2" } */
 /* { dg-add-options arm_neon } */
 
 #include "arm_neon.h"
 
+uint16x8_t out_uint16x8_t;
+uint16x8_t arg0_uint16x8_t;
+uint16x8_t arg1_uint16x8_t;
 void test_vbicQu16 (void)
 {
-  uint16x8_t out_uint16x8_t;
-  uint16x8_t arg0_uint16x8_t;
-  uint16x8_t arg1_uint16x8_t;
 
   out_uint16x8_t = vbicq_u16 (arg0_uint16x8_t, arg1_uint16x8_t);
 }
--- a/src/gcc/testsuite/gcc.target/arm/neon/vbics64.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vbics64.c
@@ -3,16 +3,16 @@
 
 /* { dg-do assemble } */
 /* { dg-require-effective-target arm_neon_ok } */
-/* { dg-options "-save-temps -O0" } */
+/* { dg-options "-save-temps -O2" } */
 /* { dg-add-options arm_neon } */
 
 #include "arm_neon.h"
 
+int64x1_t out_int64x1_t;
+int64x1_t arg0_int64x1_t;
+int64x1_t arg1_int64x1_t;
 void test_vbics64 (void)
 {
-  int64x1_t out_int64x1_t;
-  int64x1_t arg0_int64x1_t;
-  int64x1_t arg1_int64x1_t;
 
   out_int64x1_t = vbic_s64 (arg0_int64x1_t, arg1_int64x1_t);
 }
--- a/src/gcc/testsuite/gcc.target/arm/vfp-ldmdbs.c
+++ b/src/gcc/testsuite/gcc.target/arm/vfp-ldmdbs.c
@@ -3,7 +3,7 @@
 /* { dg-skip-if "need fp instructions" { *-*-* } { "-mfloat-abi=soft" } { "" } } */
 /* { dg-options "-O2 -mfpu=vfp -mfloat-abi=softfp" } */
 
-extern void baz (float);
+extern void bar (float);
 
 void
 foo (float *p, float a, int n)
@@ -13,4 +13,4 @@
   while (n--);
 }
 
-/* { dg-final { scan-assembler "fldmdbs" } } */
+/* { dg-final { scan-assembler "vldmdb.32" } } */
--- a/src/gcc/testsuite/gcc.target/arm/small-multiply-m0plus-3.c
+++ b/src/gcc/testsuite/gcc.target/arm/small-multiply-m0plus-3.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_thumb1_ok } */
+/* { dg-skip-if "Test is specific to cortex-m0plus.small-multiply" { arm*-*-* } { "-mcpu=*" } { "-mcpu=cortex-m0plus.small-multiply" } } */
+/* { dg-options "-mcpu=cortex-m0plus.small-multiply -mthumb -Os" } */
+
+int
+test (int a)
+{
+  return a * 0x13;
+}
+
+/* { dg-final { scan-assembler-not "\[\\t \]+mul" } } */
--- a/src/gcc/testsuite/gcc.target/arm/pr60606-4.c
+++ b/src/gcc/testsuite/gcc.target/arm/pr60606-4.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+
+int
+f (void)
+{
+  register unsigned int r[50] asm ("r1"); /* { dg-error "suitable for a register" } */
+  return r[1];
+}
--- a/src/gcc/testsuite/gcc.target/arm/iordi3-opt.c
+++ b/src/gcc/testsuite/gcc.target/arm/iordi3-opt.c
@@ -1,4 +1,4 @@
-/* { dg-do compile } */
+/* { dg-do compile { target { arm_arm_ok || arm_thumb2_ok} } } */
 /* { dg-options "-O1" } */
 
 unsigned long long or64 (unsigned long long input)
--- a/src/gcc/testsuite/gcc.target/arm/small-multiply-m0-1.c
+++ b/src/gcc/testsuite/gcc.target/arm/small-multiply-m0-1.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_thumb1_ok } */
+/* { dg-skip-if "Test is specific to cortex-m0.small-multiply" { arm*-*-* } { "-mcpu=*" } { "-mcpu=cortex-m0.small-multiply" } } */
+/* { dg-options "-mcpu=cortex-m0.small-multiply -mthumb -O2" } */
+
+int
+test (int a)
+{
+  return a * 0x123456;
+}
+
+/* { dg-final { scan-assembler-not "\[\\t \]+mul" } } */
--- a/src/gcc/testsuite/gcc.target/arm/pr58784.c
+++ b/src/gcc/testsuite/gcc.target/arm/pr58784.c
@@ -11,6 +11,9 @@
     char stepsRemoved;
     ptp_tlv_t tlv[1];
 } ptp_message_announce_t;
+
+extern void f (ptp_message_announce_t *);
+
 int ptplib_send_announce(int sequenceId, int i)
 {
     ptp_message_announce_t tx_packet;
--- a/src/gcc/testsuite/gcc.target/arm/iordi_notdi-1.c
+++ b/src/gcc/testsuite/gcc.target/arm/iordi_notdi-1.c
@@ -0,0 +1,65 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-inline --save-temps" } */
+
+extern void abort (void);
+
+typedef long long s64int;
+typedef int s32int;
+typedef unsigned long long u64int;
+typedef unsigned int u32int;
+
+s64int
+iordi_di_notdi (s64int a, s64int b)
+{
+  return (a | ~b);
+}
+
+s64int
+iordi_di_notzesidi (s64int a, u32int b)
+{
+  return (a | ~(u64int) b);
+}
+
+s64int
+iordi_notdi_zesidi (s64int a, u32int b)
+{
+  return (~a | (u64int) b);
+}
+
+s64int
+iordi_di_notsesidi (s64int a, s32int b)
+{
+  return (a | ~(s64int) b);
+}
+
+int main ()
+{
+  s64int a64 = 0xdeadbeef00000000ll;
+  s64int b64 = 0x000000004f4f0112ll;
+  s64int c64 = 0xdeadbeef000f0000ll;
+
+  u32int c32 = 0x01124f4f;
+  s32int d32 = 0xabbaface;
+
+  s64int z = iordi_di_notdi (a64, b64);
+  if (z != 0xffffffffb0b0feedll)
+    abort ();
+
+  z = iordi_di_notzesidi (a64, c32);
+  if (z != 0xfffffffffeedb0b0ll)
+    abort ();
+
+  z = iordi_notdi_zesidi (c64, c32);
+  if (z != 0x21524110fff2ffffll)
+    abort ();
+
+  z = iordi_di_notsesidi (a64, d32);
+  if (z != 0xdeadbeef54450531ll)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times "orn\t" 6 { target arm_thumb2 } } } */
+
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/vfp-ldmias.c
+++ b/src/gcc/testsuite/gcc.target/arm/vfp-ldmias.c
@@ -3,7 +3,7 @@
 /* { dg-skip-if "need fp instructions" { *-*-* } { "-mfloat-abi=soft" } { "" } } */
 /* { dg-options "-O2 -mfpu=vfp -mfloat-abi=softfp" } */
 
-extern void baz (float);
+extern void bar (float);
 
 void
 foo (float *p, float a, int n)
@@ -13,4 +13,4 @@
   while (n--);
 }
 
-/* { dg-final { scan-assembler "fldmias" } } */
+/* { dg-final { scan-assembler "vldmia.32" } } */
--- a/src/gcc/testsuite/gcc.target/arm/cold-lc.c
+++ b/src/gcc/testsuite/gcc.target/arm/cold-lc.c
@@ -7,6 +7,7 @@
     struct task_struct *task;
 };
 extern struct thread_info *current_thread_info (void);
+extern int show_stack (struct task_struct *, unsigned long *);
 
 void dump_stack (void)
 {
--- a/src/gcc/testsuite/gcc.target/arm/vfp-ldmdbd.c
+++ b/src/gcc/testsuite/gcc.target/arm/vfp-ldmdbd.c
@@ -13,4 +13,4 @@
   while (n--);
 }
 
-/* { dg-final { scan-assembler "fldmdbd" } } */
+/* { dg-final { scan-assembler "vldmdb.64" } } */
--- a/src/gcc/testsuite/gcc.target/arm/vfp-stmdbs.c
+++ b/src/gcc/testsuite/gcc.target/arm/vfp-stmdbs.c
@@ -12,4 +12,4 @@
   while (n--);
 }
 
-/* { dg-final { scan-assembler "fstmdbs" } } */
+/* { dg-final { scan-assembler "vstmdb.32" } } */
--- a/src/gcc/testsuite/gcc.target/arm/lp1243022.c
+++ b/src/gcc/testsuite/gcc.target/arm/lp1243022.c
@@ -47,6 +47,7 @@
 				 union xhci_trb * trb);
 struct xhci_segment *trb_in_td (struct xhci_segment *start_seg,
 				dma_addr_t suspect_dma);
+int
 xhci_test_trb_in_td (struct xhci_hcd *xhci, struct xhci_segment *input_seg,
 		     union xhci_trb *start_trb, union xhci_trb *end_trb,
 		     dma_addr_t input_dma, struct xhci_segment *result_seg,
@@ -64,6 +65,7 @@
                   "Expected seg %p, got seg %p\n", result_seg, seg);
     }
 }
+int
 xhci_check_trb_in_td_math (struct xhci_hcd *xhci, gfp_t mem_flags)
 {
     struct
--- a/src/gcc/testsuite/gcc.target/arm/vfp-ldmiad.c
+++ b/src/gcc/testsuite/gcc.target/arm/vfp-ldmiad.c
@@ -13,4 +13,4 @@
   while (n--);
 }
 
-/* { dg-final { scan-assembler "fldmiad" } } */
+/* { dg-final { scan-assembler "vldmia.64" } } */
--- a/src/gcc/testsuite/gcc.target/arm/vfp-stmias.c
+++ b/src/gcc/testsuite/gcc.target/arm/vfp-stmias.c
@@ -12,4 +12,4 @@
   while (n--);
 }
 
-/* { dg-final { scan-assembler "fstmias" } } */
+/* { dg-final { scan-assembler "vstmia.32" } } */
--- a/src/gcc/testsuite/gcc.target/arm/small-multiply-m0-2.c
+++ b/src/gcc/testsuite/gcc.target/arm/small-multiply-m0-2.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_thumb1_ok } */
+/* { dg-skip-if "Test is specific to cortex-m0.small-multiply" { arm*-*-* } { "-mcpu=*" } { "-mcpu=cortex-m0.small-multiply" } } */
+/* { dg-options "-mcpu=cortex-m0.small-multiply -mthumb -Os" } */
+
+int
+test (int a)
+{
+  return a * 0x123456;
+}
+
+/* { dg-final { scan-assembler "\[\\t \]+mul" } } */
--- a/src/gcc/testsuite/gcc.target/arm/vfp-stmdbd.c
+++ b/src/gcc/testsuite/gcc.target/arm/vfp-stmdbd.c
@@ -12,4 +12,4 @@
   while (n--);
 }
 
-/* { dg-final { scan-assembler "fstmdbd" } } */
+/* { dg-final { scan-assembler "vstmdb.64" } } */
--- a/src/gcc/testsuite/gcc.target/arm/lceil-vcvt_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/lceil-vcvt_1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_vfp_ok } */
+/* { dg-options "-O2 -march=armv8-a" } */
+/* { dg-add-options arm_v8_vfp } */
+
+int
+foofloat (float x)
+{
+  return __builtin_lceilf (x);
+}
+
+/* { dg-final { scan-assembler-times "vcvtp.s32.f32\ts\[0-9\]+, s\[0-9\]+" 1 } } */
+
+
+int
+foodouble (double x)
+{
+  return __builtin_lceil (x);
+}
+
+/* { dg-final { scan-assembler-times "vcvtp.s32.f64\ts\[0-9\]+, d\[0-9\]+" 1 } } */
--- a/src/gcc/testsuite/gcc.target/arm/vfp-stmiad.c
+++ b/src/gcc/testsuite/gcc.target/arm/vfp-stmiad.c
@@ -12,4 +12,4 @@
   while (n--);
 }
 
-/* { dg-final { scan-assembler "fstmiad" } } */
+/* { dg-final { scan-assembler "vstmia.64" } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vzips16_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzips16_1.c
@@ -0,0 +1,12 @@
+/* Test the `vzips16' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vzips16.x"
+
+/* { dg-final { scan-assembler-times "vzip\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrns16_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrns16_1.c
@@ -0,0 +1,12 @@
+/* Test the `vtrns16' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vtrns16.x"
+
+/* { dg-final { scan-assembler-times "vtrn\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vexts64_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vexts64_1.c
@@ -0,0 +1,12 @@
+/* Test the `vexts64' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/ext_s64.x"
+
+/* Don't scan assembler for vext - it can be optimized into a move from r0.  */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipu16_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipu16_1.c
@@ -0,0 +1,12 @@
+/* Test the `vzipu16' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vzipu16.x"
+
+/* { dg-final { scan-assembler-times "vzip\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqs8_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqs8_1.c
@@ -0,0 +1,12 @@
+/* Test the `vzipQs8' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vzipqs8.x"
+
+/* { dg-final { scan-assembler-times "vzip\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQu8_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQu8_1.c
@@ -0,0 +1,12 @@
+/* Test the `vextQu8' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/extq_u8.x"
+
+/* { dg-final { scan-assembler-times "vext\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 15 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnu16_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnu16_1.c
@@ -0,0 +1,12 @@
+/* Test the `vtrnu16' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vtrnu16.x"
+
+/* { dg-final { scan-assembler-times "vtrn\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs8_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs8_1.c
@@ -0,0 +1,12 @@
+/* Test the `vtrnQs8' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vtrnqs8.x"
+
+/* { dg-final { scan-assembler-times "vtrn\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqf32_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqf32_1.c
@@ -0,0 +1,12 @@
+/* Test the `vtrnQf32' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vtrnqf32.x"
+
+/* { dg-final { scan-assembler-times "vtrn\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vextu64_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextu64_1.c
@@ -0,0 +1,12 @@
+/* Test the `vextu64' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/ext_u64.x"
+
+/* Don't scan assembler for vext - it can be optimized into a move from r0.  */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qp8_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qp8_1.c
@@ -0,0 +1,12 @@
+/* Test the `vrev64q_p8' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vrev64qp8.x"
+
+/* { dg-final { scan-assembler "vrev64\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqp8_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqp8_1.c
@@ -0,0 +1,12 @@
+/* Test the `vuzpQp8' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vuzpqp8.x"
+
+/* { dg-final { scan-assembler-times "vuzp\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32p8_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32p8_1.c
@@ -0,0 +1,12 @@
+/* Test the `vrev32p8' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vrev32p8.x"
+
+/* { dg-final { scan-assembler "vrev32\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vextu8_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextu8_1.c
@@ -0,0 +1,12 @@
+/* Test the `vextu8' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/ext_u8.x"
+
+/* { dg-final { scan-assembler-times "vext\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQs64_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQs64_1.c
@@ -0,0 +1,12 @@
+/* Test the `vextQs64' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/extq_s64.x"
+
+/* { dg-final { scan-assembler-times "vext\.64\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qp16_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qp16_1.c
@@ -0,0 +1,12 @@
+/* Test the `vrev64q_p16' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vrev64qp16.x"
+
+/* { dg-final { scan-assembler "vrev64\.16\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs16_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs16_1.c
@@ -0,0 +1,12 @@
+/* Test the `vtrnQs16' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vtrnqs16.x"
+
+/* { dg-final { scan-assembler-times "vtrn\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrns8_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrns8_1.c
@@ -0,0 +1,12 @@
+/* Test the `vtrns8' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vtrns8.x"
+
+/* { dg-final { scan-assembler-times "vtrn\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qs32_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qs32_1.c
@@ -0,0 +1,12 @@
+/* Test the `vrev64q_s32' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vrev64qs32.x"
+
+/* { dg-final { scan-assembler "vrev64\.32\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQu64_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQu64_1.c
@@ -0,0 +1,12 @@
+/* Test the `vextQu64' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/extq_u64.x"
+
+/* { dg-final { scan-assembler-times "vext\.64\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu16_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu16_1.c
@@ -0,0 +1,12 @@
+/* Test the `vtrnQu16' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vtrnqu16.x"
+
+/* { dg-final { scan-assembler-times "vtrn\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64s8_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64s8_1.c
@@ -0,0 +1,12 @@
+/* Test the `vrev64s8' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vrev64s8.x"
+
+/* { dg-final { scan-assembler "vrev64\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qu32_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qu32_1.c
@@ -0,0 +1,12 @@
+/* Test the `vrev64q_u32' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vrev64qu32.x"
+
+/* { dg-final { scan-assembler "vrev64\.32\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqp16_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqp16_1.c
@@ -0,0 +1,12 @@
+/* Test the `vuzpQp16' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vuzpqp16.x"
+
+/* { dg-final { scan-assembler-times "vuzp\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vextp16_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextp16_1.c
@@ -0,0 +1,12 @@
+/* Test the `vextp16' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/ext_p16.x"
+
+/* { dg-final { scan-assembler-times "vext\.16\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs32_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs32_1.c
@@ -0,0 +1,12 @@
+/* Test the `vuzpQs32' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vuzpqs32.x"
+
+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vexts32_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vexts32_1.c
@@ -0,0 +1,12 @@
+/* Test the `vexts32' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/ext_s32.x"
+
+/* { dg-final { scan-assembler-times "vext\.32\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu32_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu32_1.c
@@ -0,0 +1,12 @@
+/* Test the `vuzpQu32' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vuzpqu32.x"
+
+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzps8_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzps8_1.c
@@ -0,0 +1,12 @@
+/* Test the `vuzps8' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vuzps8.x"
+
+/* { dg-final { scan-assembler-times "vuzp\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vextu32_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextu32_1.c
@@ -0,0 +1,12 @@
+/* Test the `vextu32' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/ext_u32.x"
+
+/* { dg-final { scan-assembler-times "vext\.32\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32s16_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32s16_1.c
@@ -0,0 +1,12 @@
+/* Test the `vrev32s16' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vrev32s16.x"
+
+/* { dg-final { scan-assembler "vrev32\.16\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqp8_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqp8_1.c
@@ -0,0 +1,12 @@
+/* Test the `vzipQp8' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vzipqp8.x"
+
+/* { dg-final { scan-assembler-times "vzip\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqp8_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqp8_1.c
@@ -0,0 +1,12 @@
+/* Test the `vtrnQp8' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vtrnqp8.x"
+
+/* { dg-final { scan-assembler-times "vtrn\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32qs8_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32qs8_1.c
@@ -0,0 +1,12 @@
+/* Test the `vrev32q_s8' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vrev32qs8.x"
+
+/* { dg-final { scan-assembler "vrev32\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32u16_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32u16_1.c
@@ -0,0 +1,12 @@
+/* Test the `vrev32u16' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vrev32u16.x"
+
+/* { dg-final { scan-assembler "vrev32\.16\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64p16_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64p16_1.c
@@ -0,0 +1,12 @@
+/* Test the `vrev64p16' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vrev64p16.x"
+
+/* { dg-final { scan-assembler "vrev64\.16\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64s32_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64s32_1.c
@@ -0,0 +1,12 @@
+/* Test the `vrev64s32' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vrev64s32.x"
+
+/* { dg-final { scan-assembler "vrev64\.32\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev16qs8_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev16qs8_1.c
@@ -0,0 +1,12 @@
+/* Test the `vrev16q_s8' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vrev16qs8.x"
+
+/* { dg-final { scan-assembler "vrev16\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/simd.exp
+++ b/src/gcc/testsuite/gcc.target/arm/simd/simd.exp
@@ -0,0 +1,35 @@
+# Copyright (C) 1997-2014 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# GCC testsuite that uses the `dg.exp' driver.
+
+# Exit immediately if this isn't an ARM target.
+if ![istarget arm*-*-*] then {
+  return
+}
+
+# Load support procs.
+load_lib gcc-dg.exp
+
+# Initialize `dg'.
+dg-init
+
+# Main loop.
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \
+	"" ""
+
+# All done.
+dg-finish
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64u32_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64u32_1.c
@@ -0,0 +1,12 @@
+/* Test the `vrev64u32' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vrev64u32.x"
+
+/* { dg-final { scan-assembler "vrev64\.32\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qu8_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qu8_1.c
@@ -0,0 +1,12 @@
+/* Test the `vrev64q_u8' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vrev64qu8.x"
+
+/* { dg-final { scan-assembler "vrev64\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpp16_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpp16_1.c
@@ -0,0 +1,12 @@
+/* Test the `vuzpp16' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vuzpp16.x"
+
+/* { dg-final { scan-assembler-times "vuzp\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzps32_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzps32_1.c
@@ -0,0 +1,12 @@
+/* Test the `vuzps32' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vuzps32.x"
+
+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpu32_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpu32_1.c
@@ -0,0 +1,12 @@
+/* Test the `vuzpu32' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vuzpu32.x"
+
+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQp16_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQp16_1.c
@@ -0,0 +1,12 @@
+/* Test the `vextQp16' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/extq_p16.x"
+
+/* { dg-final { scan-assembler-times "vext\.16\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQs32_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQs32_1.c
@@ -0,0 +1,12 @@
+/* Test the `vextQs32' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/extq_s32.x"
+
+/* { dg-final { scan-assembler-times "vext\.32\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32qp16_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32qp16_1.c
@@ -0,0 +1,12 @@
+/* Test the `vrev32q_p16' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vrev32qp16.x"
+
+/* { dg-final { scan-assembler "vrev32\.16\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqp16_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqp16_1.c
@@ -0,0 +1,12 @@
+/* Test the `vzipQp16' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vzipqp16.x"
+
+/* { dg-final { scan-assembler-times "vzip\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqs32_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqs32_1.c
@@ -0,0 +1,12 @@
+/* Test the `vzipQs32' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vzipqs32.x"
+
+/* { dg-final { scan-assembler-times "vzip\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQu32_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQu32_1.c
@@ -0,0 +1,12 @@
+/* Test the `vextQu32' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/extq_u32.x"
+
+/* { dg-final { scan-assembler-times "vext\.32\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnp8_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnp8_1.c
@@ -0,0 +1,12 @@
+/* Test the `vtrnp8' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vtrnp8.x"
+
+/* { dg-final { scan-assembler-times "vtrn\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu8_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu8_1.c
@@ -0,0 +1,12 @@
+/* Test the `vuzpQu8' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vuzpqu8.x"
+
+/* { dg-final { scan-assembler-times "vuzp\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vzips8_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzips8_1.c
@@ -0,0 +1,12 @@
+/* Test the `vzips8' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vzips8.x"
+
+/* { dg-final { scan-assembler-times "vzip\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqu32_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqu32_1.c
@@ -0,0 +1,12 @@
+/* Test the `vzipQu32' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vzipqu32.x"
+
+/* { dg-final { scan-assembler-times "vzip\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev16s8_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev16s8_1.c
@@ -0,0 +1,12 @@
+/* Test the `vrev16s8' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vrev16s8.x"
+
+/* { dg-final { scan-assembler "vrev16\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32u8_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32u8_1.c
@@ -0,0 +1,12 @@
+/* Test the `vrev32u8' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vrev32u8.x"
+
+/* { dg-final { scan-assembler "vrev32\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64p8_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64p8_1.c
@@ -0,0 +1,12 @@
+/* Test the `vrev64p8' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vrev64p8.x"
+
+/* { dg-final { scan-assembler "vrev64\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpp8_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpp8_1.c
@@ -0,0 +1,12 @@
+/* Test the `vuzpp8' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vuzpp8.x"
+
+/* { dg-final { scan-assembler-times "vuzp\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipp16_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipp16_1.c
@@ -0,0 +1,12 @@
+/* Test the `vzipp16' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vzipp16.x"
+
+/* { dg-final { scan-assembler-times "vzip\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vzips32_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzips32_1.c
@@ -0,0 +1,12 @@
+/* Test the `vzips32' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vzips32.x"
+
+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vextp64_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextp64_1.c
@@ -0,0 +1,26 @@
+/* Test the `vextp64' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_crypto_ok } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+/* { dg-add-options arm_crypto } */
+
+#include "arm_neon.h"
+
+extern void abort (void);
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  poly64x1_t in1 = {0};
+  poly64x1_t in2 = {1};
+  poly64x1_t actual = vext_p64 (in1, in2, 0);
+  if (actual != in1)
+    abort ();
+
+  return 0;
+}
+
+/* Don't scan assembler for vext - it can be optimized into a move from r0.
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32qp8_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32qp8_1.c
@@ -0,0 +1,12 @@
+/* Test the `vrev32q_p8' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vrev32qp8.x"
+
+/* { dg-final { scan-assembler "vrev32\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnp16_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnp16_1.c
@@ -0,0 +1,12 @@
+/* Test the `vtrnp16' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vtrnp16.x"
+
+/* { dg-final { scan-assembler-times "vtrn\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrns32_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrns32_1.c
@@ -0,0 +1,12 @@
+/* Test the `vtrns32' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vtrns32.x"
+
+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQs8_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQs8_1.c
@@ -0,0 +1,12 @@
+/* Test the `vextQs8' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/extq_s8.x"
+
+/* { dg-final { scan-assembler-times "vext\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 15 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev16qp8_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev16qp8_1.c
@@ -0,0 +1,12 @@
+/* Test the `vrev16q_p8' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vrev16qp8.x"
+
+/* { dg-final { scan-assembler "vrev16\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipu32_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipu32_1.c
@@ -0,0 +1,12 @@
+/* Test the `vzipu32' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vzipu32.x"
+
+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnu32_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnu32_1.c
@@ -0,0 +1,12 @@
+/* Test the `vtrnu32' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vtrnu32.x"
+
+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqu8_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqu8_1.c
@@ -0,0 +1,12 @@
+/* Test the `vzipQu8' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vzipqu8.x"
+
+/* { dg-final { scan-assembler-times "vzip\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu8_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu8_1.c
@@ -0,0 +1,12 @@
+/* Test the `vtrnQu8' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vtrnqu8.x"
+
+/* { dg-final { scan-assembler-times "vtrn\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qf32_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qf32_1.c
@@ -0,0 +1,12 @@
+/* Test the `vrev64q_f32' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vrev64qf32.x"
+
+/* { dg-final { scan-assembler "vrev64\.32\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqf32_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqf32_1.c
@@ -0,0 +1,12 @@
+/* Test the `vuzpQf32' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vuzpqf32.x"
+
+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipp8_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipp8_1.c
@@ -0,0 +1,12 @@
+/* Test the `vzipp8' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vzipp8.x"
+
+/* { dg-final { scan-assembler-times "vzip\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vextf32_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextf32_1.c
@@ -0,0 +1,12 @@
+/* Test the `vextf32' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/ext_f32.x"
+
+/* { dg-final { scan-assembler-times "vext\.32\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQp64_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQp64_1.c
@@ -0,0 +1,33 @@
+/* Test the `vextQp64' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_crypto_ok } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+/* { dg-add-options arm_crypto } */
+
+#include "arm_neon.h"
+
+extern void abort (void);
+
+poly64x2_t
+test_vextq_p64_1 (poly64x2_t a, poly64x2_t b)
+{
+  return vextq_p64(a, b, 1);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i, off;
+  poly64x2_t in1 = {0, 1};
+  poly64x2_t in2 = {2, 3};
+  poly64x2_t actual = test_vextq_p64_1 (in1, in2);
+  for (i = 0; i < 2; i++)
+    if (actual[i] != i + 1)
+      abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times "vext\.64\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vexts8_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vexts8_1.c
@@ -0,0 +1,12 @@
+/* Test the `vexts8' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/ext_s8.x"
+
+/* { dg-final { scan-assembler-times "vext\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev16p8_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev16p8_1.c
@@ -0,0 +1,12 @@
+/* Test the `vrev16p8' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vrev16p8.x"
+
+/* { dg-final { scan-assembler "vrev16\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqp16_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqp16_1.c
@@ -0,0 +1,12 @@
+/* Test the `vtrnQp16' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vtrnqp16.x"
+
+/* { dg-final { scan-assembler-times "vtrn\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs32_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs32_1.c
@@ -0,0 +1,12 @@
+/* Test the `vtrnQs32' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vtrnqs32.x"
+
+/* { dg-final { scan-assembler-times "vtrn\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu32_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu32_1.c
@@ -0,0 +1,12 @@
+/* Test the `vtrnQu32' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vtrnqu32.x"
+
+/* { dg-final { scan-assembler-times "vtrn\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnu8_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnu8_1.c
@@ -0,0 +1,12 @@
+/* Test the `vtrnu8' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vtrnu8.x"
+
+/* { dg-final { scan-assembler-times "vtrn\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qs16_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qs16_1.c
@@ -0,0 +1,12 @@
+/* Test the `vrev64q_s16' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vrev64qs16.x"
+
+/* { dg-final { scan-assembler "vrev64\.16\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64f32_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64f32_1.c
@@ -0,0 +1,12 @@
+/* Test the `vrev64f32' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vrev64f32.x"
+
+/* { dg-final { scan-assembler "vrev64\.32\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64u8_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64u8_1.c
@@ -0,0 +1,12 @@
+/* Test the `vrev64u8' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vrev64u8.x"
+
+/* { dg-final { scan-assembler "vrev64\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qu16_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qu16_1.c
@@ -0,0 +1,12 @@
+/* Test the `vrev64q_u16' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vrev64qu16.x"
+
+/* { dg-final { scan-assembler "vrev64\.16\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32p16_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32p16_1.c
@@ -0,0 +1,12 @@
+/* Test the `vrev32p16' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vrev32p16.x"
+
+/* { dg-final { scan-assembler "vrev32\.16\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQp8_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQp8_1.c
@@ -0,0 +1,12 @@
+/* Test the `vextQp8' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/extq_p8.x"
+
+/* { dg-final { scan-assembler-times "vext\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 15 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpf32_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpf32_1.c
@@ -0,0 +1,12 @@
+/* Test the `vuzpf32' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vuzpf32.x"
+
+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs16_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs16_1.c
@@ -0,0 +1,12 @@
+/* Test the `vuzpQs16' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vuzpqs16.x"
+
+/* { dg-final { scan-assembler-times "vuzp\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vexts16_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vexts16_1.c
@@ -0,0 +1,12 @@
+/* Test the `vexts16' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/ext_s16.x"
+
+/* { dg-final { scan-assembler-times "vext\.16\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu16_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu16_1.c
@@ -0,0 +1,12 @@
+/* Test the `vuzpQu16' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vuzpqu16.x"
+
+/* { dg-final { scan-assembler-times "vuzp\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpu8_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpu8_1.c
@@ -0,0 +1,12 @@
+/* Test the `vuzpu8' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vuzpu8.x"
+
+/* { dg-final { scan-assembler-times "vuzp\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQf32_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQf32_1.c
@@ -0,0 +1,12 @@
+/* Test the `vextQf32' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/extq_f32.x"
+
+/* { dg-final { scan-assembler-times "vext\.32\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vextu16_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextu16_1.c
@@ -0,0 +1,12 @@
+/* Test the `vextu16' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/ext_u16.x"
+
+/* { dg-final { scan-assembler-times "vext\.16\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqf32_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqf32_1.c
@@ -0,0 +1,12 @@
+/* Test the `vzipQf32' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vzipqf32.x"
+
+/* { dg-final { scan-assembler-times "vzip\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32qu8_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32qu8_1.c
@@ -0,0 +1,12 @@
+/* Test the `vrev32q_u8' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vrev32qu8.x"
+
+/* { dg-final { scan-assembler "vrev32\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64s16_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64s16_1.c
@@ -0,0 +1,12 @@
+/* Test the `vrev64s16' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vrev64s16.x"
+
+/* { dg-final { scan-assembler "vrev64\.16\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev16qu8_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev16qu8_1.c
@@ -0,0 +1,12 @@
+/* Test the `vrev16q_u8' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vrev16qu8.x"
+
+/* { dg-final { scan-assembler "vrev16\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64u16_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64u16_1.c
@@ -0,0 +1,12 @@
+/* Test the `vrev64u16' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vrev64u16.x"
+
+/* { dg-final { scan-assembler "vrev64\.16\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qs8_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qs8_1.c
@@ -0,0 +1,12 @@
+/* Test the `vrev64q_s8' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vrev64qs8.x"
+
+/* { dg-final { scan-assembler "vrev64\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vextp8_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextp8_1.c
@@ -0,0 +1,12 @@
+/* Test the `vextp8' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/ext_p8.x"
+
+/* { dg-final { scan-assembler-times "vext\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzps16_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzps16_1.c
@@ -0,0 +1,12 @@
+/* Test the `vuzps16' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vuzps16.x"
+
+/* { dg-final { scan-assembler-times "vuzp\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs8_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs8_1.c
@@ -0,0 +1,12 @@
+/* Test the `vuzpQs8' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vuzpqs8.x"
+
+/* { dg-final { scan-assembler-times "vuzp\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpu16_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpu16_1.c
@@ -0,0 +1,12 @@
+/* Test the `vuzpu16' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vuzpu16.x"
+
+/* { dg-final { scan-assembler-times "vuzp\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQs16_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQs16_1.c
@@ -0,0 +1,12 @@
+/* Test the `vextQs16' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/extq_s16.x"
+
+/* { dg-final { scan-assembler-times "vext\.16\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32s8_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32s8_1.c
@@ -0,0 +1,12 @@
+/* Test the `vrev32s8' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vrev32s8.x"
+
+/* { dg-final { scan-assembler "vrev32\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32qs16_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32qs16_1.c
@@ -0,0 +1,12 @@
+/* Test the `vrev32q_s16' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vrev32qs16.x"
+
+/* { dg-final { scan-assembler "vrev32\.16\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQu16_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQu16_1.c
@@ -0,0 +1,12 @@
+/* Test the `vextQu16' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/extq_u16.x"
+
+/* { dg-final { scan-assembler-times "vext\.16\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipf32_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipf32_1.c
@@ -0,0 +1,12 @@
+/* Test the `vzipf32' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vzipf32.x"
+
+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqs16_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqs16_1.c
@@ -0,0 +1,12 @@
+/* Test the `vzipQs16' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vzipqs16.x"
+
+/* { dg-final { scan-assembler-times "vzip\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnf32_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnf32_1.c
@@ -0,0 +1,12 @@
+/* Test the `vtrnf32' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vtrnf32.x"
+
+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32qu16_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32qu16_1.c
@@ -0,0 +1,12 @@
+/* Test the `vrev32q_u16' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vrev32qu16.x"
+
+/* { dg-final { scan-assembler "vrev32\.16\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqu16_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqu16_1.c
@@ -0,0 +1,12 @@
+/* Test the `vzipQu16' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vzipqu16.x"
+
+/* { dg-final { scan-assembler-times "vzip\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipu8_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipu8_1.c
@@ -0,0 +1,12 @@
+/* Test the `vzipu8' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -O1 -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vzipu8.x"
+
+/* { dg-final { scan-assembler-times "vzip\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/simd/vrev16u8_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev16u8_1.c
@@ -0,0 +1,12 @@
+/* Test the `vrev16u8' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -fno-inline" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include "../../aarch64/simd/vrev16u8.x"
+
+/* { dg-final { scan-assembler "vrev16\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/vect-lfloorf_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/vect-lfloorf_1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_neon_ok } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-all" } */
+/* { dg-add-options arm_v8_neon } */
+
+#define N 32
+
+float __attribute__((aligned(16))) input[N];
+int __attribute__((aligned(16))) output[N];
+
+void
+foo ()
+{
+  int i = 0;
+  /* Vectorizable.  */
+  for (i = 0; i < N; i++)
+    output[i] = __builtin_lfloorf (input[i]);
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.target/arm/README.advsimd-intrinsics
+++ b/src/gcc/testsuite/gcc.target/arm/README.advsimd-intrinsics
@@ -0,0 +1 @@
+Advanced SIMD intrinsics tests are located in gcc.target/aarch64.
--- a/src/gcc/testsuite/gcc.target/arm/pr51835.c
+++ b/src/gcc/testsuite/gcc.target/arm/pr51835.c
@@ -13,5 +13,5 @@
   return (unsigned int)d;
 }
 
-/* { dg-final { scan-assembler-times "fmrrd\[\\t \]+r0,\[\\t \]*r1,\[\\t \]*d0" 2 { target { arm_little_endian } } } } */
-/* { dg-final { scan-assembler-times "fmrrd\[\\t \]+r1,\[\\t \]*r0,\[\\t \]*d0" 2 { target { ! arm_little_endian } } } } */
+/* { dg-final { scan-assembler-times "vmov\[\\t \]+r0,\[\\t \]*r1,\[\\t \]*d0" 2 { target { arm_little_endian } } } } */
+/* { dg-final { scan-assembler-times "vmov\[\\t \]+r1,\[\\t \]*r0,\[\\t \]*d0" 2 { target { ! arm_little_endian } } } } */
--- a/src/gcc/testsuite/gcc.target/arm/20031108-1.c
+++ b/src/gcc/testsuite/gcc.target/arm/20031108-1.c
@@ -20,6 +20,9 @@
 
 Rec_Pointer Ptr_Glob;
 
+extern int Proc_7 (int, int, int *);
+
+void
 Proc_1 (Ptr_Val_Par)
     Rec_Pointer Ptr_Val_Par;
 {
--- a/src/gcc/testsuite/gcc.target/arm/small-multiply-m1-1.c
+++ b/src/gcc/testsuite/gcc.target/arm/small-multiply-m1-1.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_thumb1_ok } */
+/* { dg-skip-if "Test is specific to cortex-m1.small-multiply" { arm*-*-* } { "-mcpu=*" } { "-mcpu=cortex-m1.small-multiply" } } */
+/* { dg-options "-mcpu=cortex-m1.small-multiply -mthumb -O2" } */
+
+int
+test (int a)
+{
+  return a * 0x123456;
+}
+
+/* { dg-final { scan-assembler-not "\[\\t \]+mul" } } */
--- a/src/gcc/testsuite/gcc.target/arm/small-multiply-m0-3.c
+++ b/src/gcc/testsuite/gcc.target/arm/small-multiply-m0-3.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_thumb1_ok } */
+/* { dg-skip-if "Test is specific to cortex-m0.small-multiply" { arm*-*-* } { "-mcpu=*" } { "-mcpu=cortex-m0.small-multiply" } } */
+/* { dg-options "-mcpu=cortex-m0.small-multiply -mthumb -Os" } */
+
+int
+test (int a)
+{
+  return a * 0x13;
+}
+
+/* { dg-final { scan-assembler-not "\[\\t \]+mul" } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon-modes-2.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon-modes-2.c
@@ -11,6 +11,8 @@
 
 #define MANY(A) A (0), A (1), A (2), A (3), A (4), A (5)
 
+extern void foo (int *, int *);
+
 void
 bar (uint32_t *ptr, int y)
 {
--- a/src/gcc/testsuite/gcc.target/arm/vect-rounding-roundf.c
+++ b/src/gcc/testsuite/gcc.target/arm/vect-rounding-roundf.c
@@ -5,8 +5,11 @@
 
 #define N 32
 
+float __attribute__((aligned(16))) input[N];
+float __attribute__((aligned(16))) output[N];
+
 void
-foo (float *output, float *input)
+foo ()
 {
   int i = 0;
   /* Vectorizable.  */
--- a/src/gcc/testsuite/gcc.target/arm/pr43920-2.c
+++ b/src/gcc/testsuite/gcc.target/arm/pr43920-2.c
@@ -4,6 +4,8 @@
 
 #include <stdio.h>
 
+extern int lseek(int, long, int);
+
 int getFileStartAndLength (int fd, int *start_, size_t *length_)
 {
       int start, end;
--- a/src/gcc/testsuite/gcc.target/arm/xordi3-opt.c
+++ b/src/gcc/testsuite/gcc.target/arm/xordi3-opt.c
@@ -1,4 +1,4 @@
-/* { dg-do compile } */
+/* { dg-do compile { target { arm_arm_ok || arm_thumb2_ok} } } */
 /* { dg-options "-O1" } */
 
 unsigned long long xor64 (unsigned long long input)
--- a/src/gcc/testsuite/gcc.target/arm/vect-lroundf_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/vect-lroundf_1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_neon_ok } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-all" } */
+/* { dg-add-options arm_v8_neon } */
+
+#define N 32
+
+float __attribute__((aligned(16))) input[N];
+int __attribute__((aligned(16))) output[N];
+
+void
+foo ()
+{
+  int i = 0;
+  /* Vectorizable.  */
+  for (i = 0; i < N; i++)
+    output[i] = __builtin_lroundf (input[i]);
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.target/arm/small-multiply-m1-2.c
+++ b/src/gcc/testsuite/gcc.target/arm/small-multiply-m1-2.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_thumb1_ok } */
+/* { dg-skip-if "Test is specific to cortex-m1.small-multiply" { arm*-*-* } { "-mcpu=*" } { "-mcpu=cortex-m1.small-multiply" } } */
+/* { dg-options "-mcpu=cortex-m1.small-multiply -mthumb -Os" } */
+
+int
+test (int a)
+{
+  return a * 0x123456;
+}
+
+/* { dg-final { scan-assembler "\[\\t \]+mul" } } */
--- a/src/gcc/testsuite/gcc.target/arm/tail-long-call.c
+++ b/src/gcc/testsuite/gcc.target/arm/tail-long-call.c
@@ -0,0 +1,12 @@
+/* { dg-skip-if "need at least armv5te" { *-*-* } { "-march=armv[234]*" "-mthumb" } { "" } } */
+/* { dg-options "-O2 -march=armv5te -marm" } */
+/* { dg-final { scan-assembler "bx" } } */
+/* { dg-final { scan-assembler-not "blx" } } */
+
+int lcal (int) __attribute__ ((long_call));
+
+int
+dec (int a)
+{
+  return lcal (a);
+}
--- a/src/gcc/testsuite/gcc.target/arm/vect-rounding-btruncf.c
+++ b/src/gcc/testsuite/gcc.target/arm/vect-rounding-btruncf.c
@@ -5,8 +5,11 @@
 
 #define N 32
 
+float __attribute__((aligned(16))) input[N];
+float __attribute__((aligned(16))) output[N];
+
 void
-foo (float *output, float *input)
+foo ()
 {
   int i = 0;
   /* Vectorizable.  */
--- a/src/gcc/testsuite/gcc.target/arm/pr61948.c
+++ b/src/gcc/testsuite/gcc.target/arm/pr61948.c
@@ -0,0 +1,16 @@
+/* PR target/61948 */
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-require-effective-target arm_thumb2_ok } */
+/* { dg-options "-O2 -mthumb" } */
+/* { dg-add-options arm_neon } */
+
+long long f (long long *c)
+{
+  long long t = c[0];
+  asm ("nop" : : : "r0", "r3", "r4", "r5",
+		   "r6", "r7", "r8", "r9",
+		   "r10", "r11", "r12", "memory");
+  return t >> 1;
+}
+
--- a/src/gcc/testsuite/gcc.target/arm/pr51968.c
+++ b/src/gcc/testsuite/gcc.target/arm/pr51968.c
@@ -1,6 +1,6 @@
 /* PR target/51968 */
 /* { dg-do compile } */
-/* { dg-options "-O2 -march=armv7-a -mfloat-abi=softfp -mfpu=neon" } */
+/* { dg-options "-O2 -Wno-implicit-function-declaration -march=armv7-a -mfloat-abi=softfp -mfpu=neon" } */
 /* { dg-require-effective-target arm_neon_ok } */
 
 typedef __builtin_neon_qi int8x8_t __attribute__ ((__vector_size__ (8)));
--- a/src/gcc/testsuite/gcc.target/arm/vnmul-1.c
+++ b/src/gcc/testsuite/gcc.target/arm/vnmul-1.c
@@ -6,7 +6,7 @@
 double
 foo_d (double a, double b)
 {
-  /* { dg-final { scan-assembler "fnmuld" } } */
+  /* { dg-final { scan-assembler "vnmul\\.f64" } } */
   return -a * b;
 }
 
@@ -13,6 +13,6 @@
 float
 foo_s (float a, float b)
 {
-  /* { dg-final { scan-assembler "fnmuls" } } */
+  /* { dg-final { scan-assembler "vnmul\\.f32" } } */
   return -a * b;
 }
--- a/src/gcc/testsuite/gcc.target/arm/lround-vcvt_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/lround-vcvt_1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_vfp_ok } */
+/* { dg-options "-O2 -march=armv8-a -ffast-math" } */
+/* { dg-add-options arm_v8_vfp } */
+
+int
+foofloat (float x)
+{
+  return __builtin_lroundf (x);
+}
+
+/* { dg-final { scan-assembler-times "vcvta.s32.f32\ts\[0-9\]+, s\[0-9\]+" 1 } } */
+
+
+int
+foodouble (double x)
+{
+  return __builtin_lround (x);
+}
+
+/* { dg-final { scan-assembler-times "vcvta.s32.f64\ts\[0-9\]+, d\[0-9\]+" 1 } } */
--- a/src/gcc/testsuite/gcc.target/arm/pr64460_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/pr64460_1.c
@@ -0,0 +1,69 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=xscale" } */
+
+typedef unsigned int size_t;
+typedef short unsigned int __uint16_t;
+typedef long unsigned int __uint32_t;
+typedef unsigned int __uintptr_t;
+typedef __uint16_t uint16_t ;
+typedef __uint32_t uint32_t ;
+typedef __uintptr_t uintptr_t;
+typedef uint32_t Objects_Id;
+typedef uint16_t Objects_Maximum;
+typedef struct { } Objects_Control;
+
+static __inline__ void *_Addresses_Align_up (void *address, size_t alignment)
+{
+	uintptr_t mask = alignment - (uintptr_t)1;
+	return (void*)(((uintptr_t)address + mask) & ~mask);
+}
+
+typedef struct {
+	Objects_Id minimum_id;
+	Objects_Maximum maximum;
+	_Bool
+		auto_extend;
+	Objects_Maximum allocation_size;
+	void **object_blocks;
+} Objects_Information;
+
+extern uint32_t _Objects_Get_index (Objects_Id);
+extern void** _Workspace_Allocate (size_t);
+
+void _Objects_Extend_information (Objects_Information *information)
+{
+	uint32_t block_count;
+	uint32_t minimum_index;
+	uint32_t maximum;
+	size_t block_size;
+	_Bool
+		do_extend =
+		minimum_index = _Objects_Get_index( information->minimum_id );
+	if ( information->object_blocks ==
+			((void *)0)
+	   )
+		block_count = 0;
+	else {
+		block_count = information->maximum / information->allocation_size;
+	}
+	if ( do_extend ) {
+		void **object_blocks;
+		uintptr_t object_blocks_size;
+		uintptr_t inactive_per_block_size;
+		object_blocks_size = (uintptr_t)_Addresses_Align_up(
+				(void*)(block_count * sizeof(void*)),
+				8
+				);
+		inactive_per_block_size =
+			(uintptr_t)_Addresses_Align_up(
+					(void*)(block_count * sizeof(uint32_t)),
+					8
+					);
+		block_size = object_blocks_size + inactive_per_block_size +
+			((maximum + minimum_index) * sizeof(Objects_Control *));
+		if ( information->auto_extend ) {
+			object_blocks = _Workspace_Allocate( block_size );
+		} else {
+		}
+	}
+}
--- a/src/gcc/testsuite/gcc.target/arm/small-multiply-m1-3.c
+++ b/src/gcc/testsuite/gcc.target/arm/small-multiply-m1-3.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_thumb1_ok } */
+/* { dg-skip-if "Test is specific to cortex-m1.small-multiply" { arm*-*-* } { "-mcpu=*" } { "-mcpu=cortex-m1.small-multiply" } } */
+/* { dg-options "-mcpu=cortex-m1.small-multiply -mthumb -Os" } */
+
+int
+test (int a)
+{
+  return a * 0x13;
+}
+
+/* { dg-final { scan-assembler-not "\[\\t \]+mul" } } */
--- a/src/gcc/testsuite/gcc.target/arm/aapcs/abitest.h
+++ b/src/gcc/testsuite/gcc.target/arm/aapcs/abitest.h
@@ -49,6 +49,7 @@
 
 
 extern void abort (void);
+extern int memcmp (const void *s1, const void *s2, __SIZE_TYPE__ n);
 
 __attribute__((naked))  void dumpregs () __asm("myfunc");
 __attribute__((naked))  void dumpregs ()
--- a/src/gcc/testsuite/gcc.target/arm/pr60650.c
+++ b/src/gcc/testsuite/gcc.target/arm/pr60650.c
@@ -20,6 +20,10 @@
 int a, c, d;
 long long e;
 
+extern int foo1 (struct btrfs_root *, int, int, int);
+extern int foo2 (struct btrfs_root *, int, int);
+
+int
 truncate_one_csum (struct btrfs_root *p1, long long p2, long long p3)
 {
   int f, g, i = p1->fs_info->sb->s_blocksize_bits;
--- a/src/gcc/testsuite/gcc.target/arm/vfp-1.c
+++ b/src/gcc/testsuite/gcc.target/arm/vfp-1.c
@@ -11,40 +11,40 @@
 
 void test_sf() {
   /* abssf2_vfp */
-  /* { dg-final { scan-assembler "fabss" } } */
+  /* { dg-final { scan-assembler "vabs.f32" } } */
   f1 = fabsf (f1);
   /* negsf2_vfp */
-  /* { dg-final { scan-assembler "fnegs" } } */
+  /* { dg-final { scan-assembler "vneg.f32" } } */
   f1 = -f1;
   /* addsf3_vfp */
-  /* { dg-final { scan-assembler "fadds" } } */
+  /* { dg-final { scan-assembler "vadd.f32" } } */
   f1 = f2 + f3;
   /* subsf3_vfp */
-  /* { dg-final { scan-assembler "fsubs" } } */
+  /* { dg-final { scan-assembler "vsub.f32" } } */
   f1 = f2 - f3;
   /* divsf3_vfp */
-  /* { dg-final { scan-assembler "fdivs" } } */
+  /* { dg-final { scan-assembler "vdiv.f32" } } */
   f1 = f2 / f3;
   /* mulsf3_vfp */
-  /* { dg-final { scan-assembler "fmuls" } } */
+  /* { dg-final { scan-assembler "vmul.f32" } } */
   f1 = f2 * f3;
   /* mulsf3negsf_vfp */
-  /* { dg-final { scan-assembler "fnmuls" } } */
+  /* { dg-final { scan-assembler "vnmul.f32" } } */
   f1 = -f2 * f3;
   /* mulsf3addsf_vfp */
-  /* { dg-final { scan-assembler "fmacs" } } */
+  /* { dg-final { scan-assembler "vmla.f32" } } */
   f1 = f2 * f3 + f1;
   /* mulsf3subsf_vfp */
-  /* { dg-final { scan-assembler "fmscs" } } */
+  /* { dg-final { scan-assembler "vnmls.f32" } } */
   f1 = f2 * f3 - f1;
   /* mulsf3negsfaddsf_vfp */
-  /* { dg-final { scan-assembler "fnmacs" } } */
+  /* { dg-final { scan-assembler "vmls.f32" } } */
   f1 = f2 - f3 * f1;
   /* mulsf3negsfsubsf_vfp */
-  /* { dg-final { scan-assembler "fnmscs" } } */
+  /* { dg-final { scan-assembler "vnmla.f32" } } */
   f1 = -f2 * f3 - f1;
   /* sqrtsf2_vfp */
-  /* { dg-final { scan-assembler "fsqrts" } } */
+  /* { dg-final { scan-assembler "vsqrt.f32" } } */
   f1 = sqrtf (f1);
 }
 
@@ -52,40 +52,40 @@
 
 void test_df() {
   /* absdf2_vfp */
-  /* { dg-final { scan-assembler "fabsd" } } */
+  /* { dg-final { scan-assembler "vabs.f64" } } */
   d1 = fabs (d1);
   /* negdf2_vfp */
-  /* { dg-final { scan-assembler "fnegd" } } */
+  /* { dg-final { scan-assembler "vneg.f64" } } */
   d1 = -d1;
   /* adddf3_vfp */
-  /* { dg-final { scan-assembler "faddd" } } */
+  /* { dg-final { scan-assembler "vadd.f64" } } */
   d1 = d2 + d3;
   /* subdf3_vfp */
-  /* { dg-final { scan-assembler "fsubd" } } */
+  /* { dg-final { scan-assembler "vsub.f64" } } */
   d1 = d2 - d3;
   /* divdf3_vfp */
-  /* { dg-final { scan-assembler "fdivd" } } */
+  /* { dg-final { scan-assembler "vdiv.f64" } } */
   d1 = d2 / d3;
   /* muldf3_vfp */
-  /* { dg-final { scan-assembler "fmuld" } } */
+  /* { dg-final { scan-assembler "vmul.f64" } } */
   d1 = d2 * d3;
   /* muldf3negdf_vfp */
-  /* { dg-final { scan-assembler "fnmuld" } } */
+  /* { dg-final { scan-assembler "vnmul.f64" } } */
   d1 = -d2 * d3;
   /* muldf3adddf_vfp */
-  /* { dg-final { scan-assembler "fmacd" } } */
+  /* { dg-final { scan-assembler "vmla.f64" } } */
   d1 = d2 * d3 + d1;
   /* muldf3subdf_vfp */
-  /* { dg-final { scan-assembler "fmscd" } } */
+  /* { dg-final { scan-assembler "vnmls.f64" } } */
   d1 = d2 * d3 - d1;
   /* muldf3negdfadddf_vfp */
-  /* { dg-final { scan-assembler "fnmacd" } } */
+  /* { dg-final { scan-assembler "vmls.f64" } } */
   d1 = d2 - d3 * d1;
   /* muldf3negdfsubdf_vfp */
-  /* { dg-final { scan-assembler "fnmscd" } } */
+  /* { dg-final { scan-assembler "vnmla.f64" } } */
   d1 = -d2 * d3 - d1;
   /* sqrtdf2_vfp */
-  /* { dg-final { scan-assembler "fsqrtd" } } */
+  /* { dg-final { scan-assembler "vsqrt.f64" } } */
   d1 = sqrt (d1);
 }
 
@@ -94,46 +94,46 @@
 
 void test_convert () {
   /* extendsfdf2_vfp */
-  /* { dg-final { scan-assembler "fcvtds" } } */
+  /* { dg-final { scan-assembler "vcvt.f64.f32" } } */
   d1 = f1;
   /* truncdfsf2_vfp */
-  /* { dg-final { scan-assembler "fcvtsd" } } */
+  /* { dg-final { scan-assembler "vcvt.f32.f64" } } */
   f1 = d1;
   /* truncsisf2_vfp */
-  /* { dg-final { scan-assembler "ftosizs" } } */
+  /* { dg-final { scan-assembler "vcvt.s32.f32" } } */
   i1 = f1;
   /* truncsidf2_vfp */
-  /* { dg-final { scan-assembler "ftosizd" } } */
+  /* { dg-final { scan-assembler "vcvt.s32.f64" } } */
   i1 = d1;
   /* fixuns_truncsfsi2 */
-  /* { dg-final { scan-assembler "ftouizs" } } */
+  /* { dg-final { scan-assembler "vcvt.u32.f32" } } */
   u1 = f1;
   /* fixuns_truncdfsi2 */
-  /* { dg-final { scan-assembler "ftouizd" } } */
+  /* { dg-final { scan-assembler "vcvt.u32.f64" } } */
   u1 = d1;
   /* floatsisf2_vfp */
-  /* { dg-final { scan-assembler "fsitos" } } */
+  /* { dg-final { scan-assembler "vcvt.f32.s32" } } */
   f1 = i1;
   /* floatsidf2_vfp */
-  /* { dg-final { scan-assembler "fsitod" } } */
+  /* { dg-final { scan-assembler "vcvt.f64.s32" } } */
   d1 = i1;
   /* floatunssisf2 */
-  /* { dg-final { scan-assembler "fuitos" } } */
+  /* { dg-final { scan-assembler "vcvt.f32.u32" } } */
   f1 = u1;
   /* floatunssidf2 */
-  /* { dg-final { scan-assembler "fuitod" } } */
+  /* { dg-final { scan-assembler "vcvt.f64.u32" } } */
   d1 = u1;
 }
 
 void test_ldst (float f[], double d[]) {
-  /* { dg-final { scan-assembler "flds.+ \\\[r0, #1020\\\]" } } */
-  /* { dg-final { scan-assembler "flds.+ \\\[r\[0-9\], #-1020\\\]" { target { arm32 && { ! arm_thumb2_ok } } } } } */
+  /* { dg-final { scan-assembler "vldr.32.+ \\\[r0, #1020\\\]" } } */
+  /* { dg-final { scan-assembler "vldr.32.+ \\\[r\[0-9\], #-1020\\\]" { target { arm32 && { ! arm_thumb2_ok } } } } } */
   /* { dg-final { scan-assembler "add.+ r0, #1024" } } */
-  /* { dg-final { scan-assembler "fsts.+ \\\[r\[0-9\]\\\]\n" } } */
+  /* { dg-final { scan-assembler "vstr.32.+ \\\[r\[0-9\]\\\]\n" } } */
   f[256] = f[255] + f[-255];
 
-  /* { dg-final { scan-assembler "fldd.+ \\\[r1, #1016\\\]" } } */
-  /* { dg-final { scan-assembler "fldd.+ \\\[r\[1-9\], #-1016\\\]" { target { arm32 && { ! arm_thumb2_ok } } } } } */
-  /* { dg-final { scan-assembler "fstd.+ \\\[r1, #256\\\]" } } */
+  /* { dg-final { scan-assembler "vldr.64.+ \\\[r1, #1016\\\]" } } */
+  /* { dg-final { scan-assembler "vldr.64.+ \\\[r\[1-9\], #-1016\\\]" { target { arm32 && { ! arm_thumb2_ok } } } } } */
+  /* { dg-final { scan-assembler "vstr.64.+ \\\[r1, #256\\\]" } } */
   d[32] = d[127] + d[-127];
 }
--- a/src/gcc/testsuite/gcc.target/arm/vect-copysignf.c
+++ b/src/gcc/testsuite/gcc.target/arm/vect-copysignf.c
@@ -0,0 +1,36 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details" } */
+/* { dg-add-options "arm_neon" } */
+
+extern void abort ();
+
+#define N 16
+float a[N] = {-0.1f, -3.2f, -6.3f, -9.4f,
+	      -12.5f, -15.6f, -18.7f, -21.8f,
+	      24.9f, 27.1f, 30.2f, 33.3f,
+	      36.4f, 39.5f, 42.6f, 45.7f};
+float b[N] = {-1.2f, 3.4f, -5.6f, 7.8f,
+	      -9.0f, 1.0f, -2.0f, 3.0f,
+	      -4.0f, -5.0f, 6.0f, 7.0f,
+	      -8.0f, -9.0f, 10.0f, 11.0f};
+float r[N];
+
+int
+main (void)
+{
+  int i;
+
+  for (i = 0; i < N; i++)
+    r[i] = __builtin_copysignf (a[i], b[i]);
+
+  /* check results:  */
+  for (i = 0; i < N; i++)
+    if (r[i] != __builtin_copysignf (a[i], b[i]))
+      abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.target/arm/vnmul-2.c
+++ b/src/gcc/testsuite/gcc.target/arm/vnmul-2.c
@@ -6,7 +6,7 @@
 double
 foo_d (double a, double b)
 {
-  /* { dg-final { scan-assembler-not "fnmuld" } } */
+  /* { dg-final { scan-assembler-not "vnmul\\.f64" } } */
   return -a * b;
 }
 
@@ -13,6 +13,6 @@
 float
 foo_s (float a, float b)
 {
-  /* { dg-final { scan-assembler-not "fnmuls" } } */
+  /* { dg-final { scan-assembler-not "vnmul\\.f32" } } */
   return -a * b;
 }
--- a/src/gcc/testsuite/gcc.target/arm/rev16.c
+++ b/src/gcc/testsuite/gcc.target/arm/rev16.c
@@ -0,0 +1,35 @@
+/* { dg-options "-O2" } */
+/* { dg-do run } */
+
+extern void abort (void);
+
+typedef unsigned int __u32;
+
+__u32
+__rev16_32_alt (__u32 x)
+{
+  return (((__u32)(x) & (__u32)0xff00ff00UL) >> 8)
+         | (((__u32)(x) & (__u32)0x00ff00ffUL) << 8);
+}
+
+__u32
+__rev16_32 (__u32 x)
+{
+  return (((__u32)(x) & (__u32)0x00ff00ffUL) << 8)
+         | (((__u32)(x) & (__u32)0xff00ff00UL) >> 8);
+}
+
+int
+main (void)
+{
+  volatile __u32 in32 = 0x12345678;
+  volatile __u32 expected32 = 0x34127856;
+
+  if (__rev16_32 (in32) != expected32)
+    abort ();
+
+  if (__rev16_32_alt (in32) != expected32)
+    abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/arm/anddi_notdi-1.c
+++ b/src/gcc/testsuite/gcc.target/arm/anddi_notdi-1.c
@@ -0,0 +1,65 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-inline --save-temps" } */
+
+extern void abort (void);
+
+typedef long long s64int;
+typedef int s32int;
+typedef unsigned long long u64int;
+typedef unsigned int u32int;
+
+s64int
+anddi_di_notdi (s64int a, s64int b)
+{
+  return (a & ~b);
+}
+
+s64int
+anddi_di_notzesidi (s64int a, u32int b)
+{
+  return (a & ~(u64int) b);
+}
+
+s64int
+anddi_notdi_zesidi (s64int a, u32int b)
+{
+  return (~a & (u64int) b);
+}
+
+s64int
+anddi_di_notsesidi (s64int a, s32int b)
+{
+  return (a & ~(s64int) b);
+}
+
+int main ()
+{
+  s64int a64 = 0xdeadbeef0000ffffll;
+  s64int b64 = 0x000000005f470112ll;
+  s64int c64 = 0xdeadbeef300f0000ll;
+
+  u32int c32 = 0x01124f4f;
+  s32int d32 = 0xabbaface;
+
+  s64int z = anddi_di_notdi (c64, b64);
+  if (z != 0xdeadbeef20080000ll)
+    abort ();
+
+  z = anddi_di_notzesidi (a64, c32);
+  if (z != 0xdeadbeef0000b0b0ll)
+    abort ();
+
+  z = anddi_notdi_zesidi (c64, c32);
+  if (z != 0x0000000001104f4fll)
+    abort ();
+
+  z = anddi_di_notsesidi (a64, d32);
+  if (z != 0x0000000000000531ll)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times "bic\t" 6 } } */
+
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/small-multiply-m0plus-1.c
+++ b/src/gcc/testsuite/gcc.target/arm/small-multiply-m0plus-1.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_thumb1_ok } */
+/* { dg-skip-if "Test is specific to cortex-m0plus.small-multiply" { arm*-*-* } { "-mcpu=*" } { "-mcpu=cortex-m0plus.small-multiply" } } */
+/* { dg-options "-mcpu=cortex-m0plus.small-multiply -mthumb -O2" } */
+
+int
+test (int a)
+{
+  return a * 0x123456;
+}
+
+/* { dg-final { scan-assembler-not "\[\\t \]+mul" } } */
--- a/src/gcc/testsuite/gcc.target/arm/vnmul-3.c
+++ b/src/gcc/testsuite/gcc.target/arm/vnmul-3.c
@@ -6,7 +6,7 @@
 double
 foo_d (double a, double b)
 {
-  /* { dg-final { scan-assembler "fnmuld" } } */
+  /* { dg-final { scan-assembler "vnmul\\.f64" } } */
   return -(a * b);
 }
 
@@ -13,6 +13,6 @@
 float
 foo_s (float a, float b)
 {
-  /* { dg-final { scan-assembler "fnmuls" } } */
+  /* { dg-final { scan-assembler "vnmul\\.f32" } } */
   return -(a * b);
 }
--- a/src/gcc/testsuite/gcc.target/arm/pr63210.c
+++ b/src/gcc/testsuite/gcc.target/arm/pr63210.c
@@ -0,0 +1,12 @@
+/* { dg-do assemble } */
+/* { dg-options "-mthumb -Os " }  */
+/* { dg-require-effective-target arm_thumb1_ok } */
+
+int foo1 (int c);
+int foo2 (int c);
+
+int test (int c)
+{
+  return (foo1 (c) || foo2 (c));
+}
+/* { dg-final { object-size text <= 28 } } */
--- a/src/gcc/testsuite/gcc.target/arm/pr60606-2.c
+++ b/src/gcc/testsuite/gcc.target/arm/pr60606-2.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+
+int
+f (void)
+{
+  register unsigned pc asm ("pc"); /* { dg-error "not general enough" } */
+  
+  return pc > 0x12345678;
+}
--- a/src/gcc/testsuite/gcc.target/arm/vect-rounding-ceilf.c
+++ b/src/gcc/testsuite/gcc.target/arm/vect-rounding-ceilf.c
@@ -5,8 +5,11 @@
 
 #define N 32
 
+float __attribute__((aligned(16))) input[N];
+float __attribute__((aligned(16))) output[N];
+
 void
-foo (float *output, float *input)
+foo ()
 {
   int i = 0;
   /* Vectorizable.  */
--- a/src/gcc/testsuite/gcc.target/arm/pr60650-2.c
+++ b/src/gcc/testsuite/gcc.target/arm/pr60650-2.c
@@ -4,17 +4,19 @@
 int a, h, j;
 long long d, e, i;
 int f;
+int
 fn1 (void *p1, int p2)
 {
     switch (p2)
     case 8:
 {
-    register b = *(long long *) p1, c asm ("r2");
+    register int b = *(long long *) p1, c asm ("r2");
     asm ("%0": "=r" (a), "=r" (c):"r" (b), "r" (0));
     *(long long *) p1 = c;
     }
 }
 
+int
 fn2 ()
 {
     int k;
@@ -27,8 +29,8 @@
         case 0:
         (
         {
-            register l asm ("r4");
-            register m asm ("r0");
+            register int l asm ("r4");
+            register int m asm ("r0");
             asm ("  .err  .endif\n\t": "=r" (h), "=r" (j):"r" (m),
             "r"
             (l));;
--- a/src/gcc/testsuite/gcc.target/arm/pr55642.c
+++ b/src/gcc/testsuite/gcc.target/arm/pr55642.c
@@ -2,6 +2,8 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target arm_thumb2_ok } */
 
+extern int abs (int);
+
 int
 foo (int v)
 {
--- a/src/gcc/testsuite/gcc.target/arm/lfloor-vcvt_1.c
+++ b/src/gcc/testsuite/gcc.target/arm/lfloor-vcvt_1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_vfp_ok } */
+/* { dg-options "-O2 -march=armv8-a" } */
+/* { dg-add-options arm_v8_vfp } */
+
+int
+foofloat (float x)
+{
+  return __builtin_lfloorf (x);
+}
+
+/* { dg-final { scan-assembler-times "vcvtm.s32.f32\ts\[0-9\]+, s\[0-9\]+" 1 } } */
+
+
+int
+foodouble (double x)
+{
+  return __builtin_lfloor (x);
+}
+
+/* { dg-final { scan-assembler-times "vcvtm.s32.f64\ts\[0-9\]+, d\[0-9\]+" 1 } } */
--- a/src/gcc/testsuite/gcc.target/arm/small-multiply-m0plus-2.c
+++ b/src/gcc/testsuite/gcc.target/arm/small-multiply-m0plus-2.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_thumb1_ok } */
+/* { dg-skip-if "Test is specific to cortex-m0plus.small-multiply" { arm*-*-* } { "-mcpu=*" } { "-mcpu=cortex-m0plus.small-multiply" } } */
+/* { dg-options "-mcpu=cortex-m0plus.small-multiply -mthumb -Os" } */
+
+int
+test (int a)
+{
+  return a * 0x123456;
+}
+
+/* { dg-final { scan-assembler "\[\\t \]+mul" } } */
--- a/src/gcc/testsuite/gcc.target/arm/vnmul-4.c
+++ b/src/gcc/testsuite/gcc.target/arm/vnmul-4.c
@@ -6,7 +6,7 @@
 double
 foo_d (double a, double b)
 {
-  /* { dg-final { scan-assembler "fnmuld" } } */
+  /* { dg-final { scan-assembler "vnmul\\.f64" } } */
   return -(a * b);
 }
 
@@ -13,6 +13,6 @@
 float
 foo_s (float a, float b)
 {
-  /* { dg-final { scan-assembler "fnmuls" } } */
+  /* { dg-final { scan-assembler "vnmul\\.f32" } } */
   return -(a * b);
 }
--- a/src/gcc/testsuite/gcc.target/arm/pr60606-3.c
+++ b/src/gcc/testsuite/gcc.target/arm/pr60606-3.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+
+int
+f (void)
+{
+  register unsigned int r asm ("cc"); /* { dg-error "not general enough|suitable for data type" } */
+  return r;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_1.c
@@ -0,0 +1,19 @@
+/* Verify:
+     * -fomit-frame-pointer.
+     * withoug outgoing.
+     * total frame size <= 256.
+     * number of callee-save reg == 1.
+     * optimized code should use "str !" for stack adjustment.  */
+
+/* { dg-do run } */
+/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */
+
+#include "test_frame_common.h"
+
+t_frame_pattern (test1, 200, )
+t_frame_run (test1)
+
+/* { dg-final { scan-assembler-times "str\tx30, \\\[sp, -\[0-9\]+\\\]!" 2 } } */
+/* { dg-final { scan-assembler-times "ldr\tx30, \\\[sp\\\], \[0-9\]+" 3 } } */
+
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_9.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_9.c
@@ -0,0 +1,17 @@
+/* Verify:
+     * -fomit-frame-pointer.
+     * with outgoing.
+     * total frame size > 512.
+       area except outgoing <= 512
+     * number of callee-saved reg = 1.
+     * Split stack adjustment into two subtractions.
+       the first subtractions couldn't be optimized
+       into "str !" as it's > 256.  */
+
+/* { dg-do run } */
+/* { dg-options "-O2 -fomit-frame-pointer" } */
+
+#include "test_frame_common.h"
+
+t_frame_pattern_outgoing (test9, 480, , 24, a[8], a[9], a[10])
+t_frame_run (test9)
--- a/src/gcc/testsuite/gcc.target/aarch64/vldN_lane_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/vldN_lane_1.c
@@ -0,0 +1,97 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -fno-inline" } */
+
+#include <arm_neon.h>
+
+extern void abort (void);
+
+#define VARIANTS(VARIANT, STRUCT)	\
+VARIANT (uint8, , 8, _u8, 6, STRUCT)	\
+VARIANT (uint16, , 4, _u16, 3, STRUCT)	\
+VARIANT (uint32, , 2, _u32, 1, STRUCT)	\
+VARIANT (uint64, , 1, _u64, 0, STRUCT)	\
+VARIANT (int8, , 8, _s8, 5, STRUCT)	\
+VARIANT (int16, , 4, _s16, 2, STRUCT)	\
+VARIANT (int32, , 2, _s32, 0, STRUCT)	\
+VARIANT (int64, , 1, _s64, 0, STRUCT)	\
+VARIANT (poly8, , 8, _p8, 7, STRUCT)	\
+VARIANT (poly16, , 4, _p16, 1, STRUCT)	\
+VARIANT (float32, , 2, _f32, 1, STRUCT)	\
+VARIANT (float64, , 1, _f64, 0, STRUCT)	\
+VARIANT (uint8, q, 16, _u8, 14, STRUCT)	\
+VARIANT (uint16, q, 8, _u16, 4, STRUCT)	\
+VARIANT (uint32, q, 4, _u32, 3, STRUCT)	\
+VARIANT (uint64, q, 2, _u64, 0, STRUCT)	\
+VARIANT (int8, q, 16, _s8, 13, STRUCT)	\
+VARIANT (int16, q, 8, _s16, 6, STRUCT)	\
+VARIANT (int32, q, 4, _s32, 2, STRUCT)	\
+VARIANT (int64, q, 2, _s64, 1, STRUCT)	\
+VARIANT (poly8, q, 16, _p8, 12, STRUCT)	\
+VARIANT (poly16, q, 8, _p16, 5, STRUCT)	\
+VARIANT (float32, q, 4, _f32, 1, STRUCT)\
+VARIANT (float64, q, 2, _f64, 0, STRUCT)
+
+#define TESTMETH(BASE, Q, ELTS, SUFFIX, LANE, STRUCT)			\
+int									\
+test_vld##STRUCT##Q##_lane##SUFFIX (const BASE##_t *data,		\
+				     const BASE##_t *overwrite)		\
+{									\
+  BASE##x##ELTS##x##STRUCT##_t vectors;					\
+  BASE##_t temp[ELTS];							\
+  int i,j;								\
+  for (i = 0; i < STRUCT; i++, data += ELTS)				\
+    vectors.val[i] = vld1##Q##SUFFIX (data);				\
+  vectors = vld##STRUCT##Q##_lane##SUFFIX (overwrite, vectors, LANE);	\
+  while (--i >= 0)							\
+    {									\
+      vst1##Q##SUFFIX (temp, vectors.val[i]);				\
+      data -= ELTS; /* Point at value loaded before vldN_lane.  */	\
+      for (j = 0; j < ELTS; j++)					\
+        if (temp[j] != (j == LANE ? overwrite[i] : data[j]))		\
+          return 1;							\
+    }									\
+  return 0;								\
+}
+
+
+/* Tests of vld2_dup and vld2q_dup.  */
+VARIANTS (TESTMETH, 2)
+/* Tests of vld3_dup and vld3q_dup.  */
+VARIANTS (TESTMETH, 3)
+/* Tests of vld4_dup and vld4q_dup.  */
+VARIANTS (TESTMETH, 4)
+
+#define CHECK(BASE, Q, ELTS, SUFFIX, LANE, STRUCT)			\
+  if (test_vld##STRUCT##Q##_lane##SUFFIX ((const BASE##_t *)orig_data,	\
+						BASE##_data) != 0)	\
+    abort ();
+
+int
+main (int argc, char **argv)
+{
+  /* Original data for all vector formats.  */
+  uint64_t orig_data[8] = {0x1234567890abcdefULL, 0x13579bdf02468aceULL,
+			   0x012389ab4567cdefULL, 0xfeeddadacafe0431ULL,
+			   0x1032547698badcfeULL, 0xbadbadbadbad0badULL,
+			   0x0102030405060708ULL, 0x0f0e0d0c0b0a0908ULL};
+
+  /* Data with which vldN_lane will overwrite some of previous.  */
+  uint8_t uint8_data[4] = { 7, 11, 13, 17 };
+  uint16_t uint16_data[4] = { 257, 263, 269, 271 };
+  uint32_t uint32_data[4] = { 65537, 65539, 65543, 65551 };
+  uint64_t uint64_data[4] = { 0xdeadbeefcafebabeULL, 0x0123456789abcdefULL,
+			      0xfedcba9876543210LL, 0xdeadbabecafebeefLL };
+  int8_t int8_data[4] = { -1, 3, -5, 7 };
+  int16_t int16_data[4] = { 257, -259, 261, -263 };
+  int32_t int32_data[4] = { 123456789, -987654321, -135792468, 975318642 };
+  int64_t *int64_data = (int64_t *)uint64_data;
+  poly8_t poly8_data[4] = { 0, 7, 13, 18, };
+  poly16_t poly16_data[4] = { 11111, 2222, 333, 44 };
+  float32_t float32_data[4] = { 3.14159, 2.718, 1.414, 100.0 };
+  float64_t float64_data[4] = { 1.010010001, 12345.6789, -9876.54321, 1.618 };
+
+  VARIANTS (CHECK, 2);
+  VARIANTS (CHECK, 3);
+  VARIANTS (CHECK, 4);
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/vldN_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/vldN_1.c
@@ -0,0 +1,79 @@
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+
+#include <arm_neon.h>
+
+extern void abort (void);
+
+#define TESTMETH(BASE, ELTS, STRUCT, SUFFIX)	\
+int __attribute__ ((noinline))			\
+test_vld##STRUCT##SUFFIX ()			\
+{						\
+  BASE##_t data[ELTS * STRUCT];			\
+  BASE##_t temp[ELTS];				\
+  BASE##x##ELTS##x##STRUCT##_t vectors;		\
+  int i,j;					\
+  for (i = 0; i < STRUCT * ELTS; i++)		\
+    data [i] = (BASE##_t) 2*i + 1;		\
+  asm volatile ("" : : : "memory");		\
+  vectors = vld##STRUCT##SUFFIX (data);		\
+  for (i = 0; i < STRUCT; i++)			\
+    {						\
+      vst1##SUFFIX (temp, vectors.val[i]);	\
+      asm volatile ("" : : : "memory");		\
+      for (j = 0; j < ELTS; j++)		\
+        if (temp[j] != data[i + STRUCT*j])	\
+          return 1;				\
+    }						\
+  return 0;					\
+}
+
+#define VARIANTS(VARIANT, STRUCT)	\
+VARIANT (uint8, 8, STRUCT, _u8)		\
+VARIANT (uint16, 4, STRUCT, _u16)	\
+VARIANT (uint32, 2, STRUCT, _u32)	\
+VARIANT (uint64, 1, STRUCT, _u64)	\
+VARIANT (int8, 8, STRUCT, _s8)		\
+VARIANT (int16, 4, STRUCT, _s16)	\
+VARIANT (int32, 2, STRUCT, _s32)	\
+VARIANT (int64, 1, STRUCT, _s64)	\
+VARIANT (poly8, 8, STRUCT, _p8)		\
+VARIANT (poly16, 4, STRUCT, _p16)	\
+VARIANT (float32, 2, STRUCT, _f32)	\
+VARIANT (float64, 1, STRUCT, _f64)	\
+VARIANT (uint8, 16, STRUCT, q_u8)	\
+VARIANT (uint16, 8, STRUCT, q_u16)	\
+VARIANT (uint32, 4, STRUCT, q_u32)	\
+VARIANT (uint64, 2, STRUCT, q_u64)	\
+VARIANT (int8, 16, STRUCT, q_s8)	\
+VARIANT (int16, 8, STRUCT, q_s16)	\
+VARIANT (int32, 4, STRUCT, q_s32)	\
+VARIANT (int64, 2, STRUCT, q_s64)	\
+VARIANT (poly8, 16, STRUCT, q_p8)	\
+VARIANT (poly16, 8, STRUCT, q_p16)	\
+VARIANT (float32, 4, STRUCT, q_f32)	\
+VARIANT (float64, 2, STRUCT, q_f64)
+
+/* Tests of vld2 and vld2q.  */
+VARIANTS (TESTMETH, 2)
+
+/* Tests of vld3 and vld3q.  */
+VARIANTS (TESTMETH, 3)
+
+/* Tests of vld4 and vld4q.  */
+VARIANTS (TESTMETH, 4)
+
+#define CHECK(BASE, ELTS, STRUCT, SUFFIX)	\
+  if (test_vld##STRUCT##SUFFIX () != 0)		\
+    abort ();
+
+int
+main (int argc, char **argv)
+{
+  VARIANTS (CHECK, 2)
+  VARIANTS (CHECK, 3)
+  VARIANTS (CHECK, 4)
+
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/vqabs_s64_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/vqabs_s64_1.c
@@ -0,0 +1,54 @@
+/* Test vqabs_s64 intrinsics work correctly.  */
+/* { dg-do run } */
+/* { dg-options "--save-temps" } */
+
+#include <arm_neon.h>
+
+extern void abort (void);
+
+int __attribute__ ((noinline))
+test_vqabs_s64 (int64x1_t passed, int64_t expected)
+{
+  return vget_lane_s64 (vqabs_s64 (passed), 0) != expected;
+}
+
+int __attribute__ ((noinline))
+test_vqabsd_s64 (int64_t passed, int64_t expected)
+{
+  return vqabsd_s64 (passed) != expected;
+}
+
+/* { dg-final { scan-assembler-times "sqabs\\td\[0-9\]+, d\[0-9\]+" 2 } } */
+
+int
+main (int argc, char **argv)
+{
+  /* Basic test.  */
+  if (test_vqabs_s64 (vcreate_s64 (-1), 1))
+    abort ();
+  if (test_vqabsd_s64 (-1, 1))
+    abort ();
+
+  /* Getting absolute value of min int64_t.
+     Note, exact result cannot be represented in int64_t,
+     so max int64_t is expected.  */
+  if (test_vqabs_s64 (vcreate_s64 (0x8000000000000000), 0x7fffffffffffffff))
+    abort ();
+  if (test_vqabsd_s64 (0x8000000000000000, 0x7fffffffffffffff))
+    abort ();
+
+  /* Another input that gets max int64_t.  */
+  if (test_vqabs_s64 (vcreate_s64 (0x8000000000000001), 0x7fffffffffffffff))
+    abort ();
+  if (test_vqabsd_s64 (0x8000000000000001, 0x7fffffffffffffff))
+    abort ();
+
+  /* Checking that large positive numbers stay the same.  */
+  if (test_vqabs_s64 (vcreate_s64 (0x7fffffffffffffff), 0x7fffffffffffffff))
+    abort ();
+  if (test_vqabsd_s64 (0x7fffffffffffffff, 0x7fffffffffffffff))
+    abort ();
+
+  return 0;
+}
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/eon_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/eon_1.c
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+/* { dg-final { scan-assembler-not "\tf?mov\t" } } */
+
+typedef long long int64_t;
+typedef int64_t int64x1_t __attribute__ ((__vector_size__ (8)));
+
+/* { dg-final { scan-assembler-times "\\teon\\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+" 1 } } */
+
+int64_t
+test_eon (int64_t a, int64_t b)
+{
+  return a ^ ~b;
+}
+
+/* { dg-final { scan-assembler-times "\\tmvn\\tx\[0-9\]+, x\[0-9\]+" 1 } } */
+int64_t
+test_not (int64_t a)
+{
+  return ~a;
+}
+
+/* There is no eon for SIMD regs; we prefer eor+mvn to mov+mov+eon+mov.  */
+
+/* { dg-final { scan-assembler-times "\\teor\\tv\[0-9\]+\.8b, v\[0-9\]+\.8b, v\[0-9\]+\.8b" 1 } } */
+/* { dg-final { scan-assembler-times "\\tmvn\\tv\[0-9\]+\.8b, v\[0-9\]+\.8b" 2 } } */
+int64x1_t
+test_vec_eon (int64x1_t a, int64x1_t b)
+{
+  return a ^ ~b;
+}
+
+int64x1_t
+test_vec_not (int64x1_t a)
+{
+  return ~a;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/acle/acle.exp
+++ b/src/gcc/testsuite/gcc.target/aarch64/acle/acle.exp
@@ -0,0 +1,35 @@
+# Copyright (C) 2014 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# GCC testsuite that uses the `dg.exp' driver.
+
+# Exit immediately if this isn't an AArch64 target.
+if ![istarget aarch64*-*-*] then {
+  return
+}
+
+# Load support procs.
+load_lib gcc-dg.exp
+
+# Initialize `dg'.
+dg-init
+
+# Main loop.
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \
+	"" ""
+
+# All done.
+dg-finish
--- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32b.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32b.c
@@ -0,0 +1,15 @@
+/* Test the crc32b ACLE intrinsic.  */
+
+/* { dg-do assemble } */
+/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */
+
+#include "arm_acle.h"
+
+uint32_t
+test_crc32b (uint32_t arg0, uint8_t arg1)
+{
+  return __crc32b (arg0, arg1);
+}
+
+/* { dg-final { scan-assembler "crc32b\tw..?, w..?, w..?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32d.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32d.c
@@ -0,0 +1,15 @@
+/* Test the crc32d ACLE intrinsic.  */
+
+/* { dg-do assemble } */
+/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */
+
+#include "arm_acle.h"
+
+uint32_t
+test_crc32d (uint32_t arg0, uint64_t arg1)
+{
+  return __crc32d (arg0, arg1);
+}
+
+/* { dg-final { scan-assembler "crc32x\tw..?, w..?, x..?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32cb.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32cb.c
@@ -0,0 +1,15 @@
+/* Test the crc32cb ACLE intrinsic.  */
+
+/* { dg-do assemble } */
+/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */
+
+#include "arm_acle.h"
+
+uint32_t
+test_crc32cb (uint32_t arg0, uint8_t arg1)
+{
+  return __crc32cb (arg0, arg1);
+}
+
+/* { dg-final { scan-assembler "crc32cb\tw..?, w..?, w..?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32cd.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32cd.c
@@ -0,0 +1,15 @@
+/* Test the crc32cd ACLE intrinsic.  */
+
+/* { dg-do assemble } */
+/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */
+
+#include "arm_acle.h"
+
+uint32_t
+test_crc32cd (uint32_t arg0, uint64_t arg1)
+{
+  return __crc32cd (arg0, arg1);
+}
+
+/* { dg-final { scan-assembler "crc32cx\tw..?, w..?, x..?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32w.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32w.c
@@ -0,0 +1,15 @@
+/* Test the crc32w ACLE intrinsic.  */
+
+/* { dg-do assemble } */
+/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */
+
+#include "arm_acle.h"
+
+uint32_t
+test_crc32w (uint32_t arg0, uint32_t arg1)
+{
+  return __crc32w (arg0, arg1);
+}
+
+/* { dg-final { scan-assembler "crc32w\tw..?, w..?, w..?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32h.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32h.c
@@ -0,0 +1,15 @@
+/* Test the crc32h ACLE intrinsic.  */
+
+/* { dg-do assemble } */
+/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */
+
+#include "arm_acle.h"
+
+uint32_t
+test_crc32h (uint32_t arg0, uint16_t arg1)
+{
+  return __crc32h (arg0, arg1);
+}
+
+/* { dg-final { scan-assembler "crc32h\tw..?, w..?, w..?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32cw.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32cw.c
@@ -0,0 +1,15 @@
+/* Test the crc32cw ACLE intrinsic.  */
+
+/* { dg-do assemble } */
+/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */
+
+#include "arm_acle.h"
+
+uint32_t
+test_crc32cw (uint32_t arg0, uint32_t arg1)
+{
+  return __crc32cw (arg0, arg1);
+}
+
+/* { dg-final { scan-assembler "crc32cw\tw..?, w..?, w..?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32ch.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32ch.c
@@ -0,0 +1,15 @@
+/* Test the crc32ch ACLE intrinsic.  */
+
+/* { dg-do assemble } */
+/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */
+
+#include "arm_acle.h"
+
+uint32_t
+test_crc32ch (uint32_t arg0, uint16_t arg1)
+{
+  return __crc32ch (arg0, arg1);
+}
+
+/* { dg-final { scan-assembler "crc32ch\tw..?, w..?, w..?\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_13.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_13.c
@@ -0,0 +1,18 @@
+/* Verify:
+     * without outgoing.
+     * total frame size > 512.
+     * number of callee-save reg >= 2.
+     * split the stack adjustment into two substractions,
+       the second could be optimized into "stp !".  */
+
+/* { dg-do run } */
+/* { dg-options "-O2 --save-temps" } */
+
+#include "test_frame_common.h"
+
+t_frame_pattern (test13, 700, )
+t_frame_run (test13)
+
+/* { dg-final { scan-assembler-times "sub\tsp, sp, #\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "stp\tx29, x30, \\\[sp, -\[0-9\]+\\\]!" 2 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/lr_free_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/lr_free_1.c
@@ -0,0 +1,38 @@
+/* { dg-do run } */
+/* { dg-options "-fno-inline -O2 -fomit-frame-pointer -ffixed-x2 -ffixed-x3 -ffixed-x4 -ffixed-x5 -ffixed-x6 -ffixed-x7 -ffixed-x8 -ffixed-x9 -ffixed-x10 -ffixed-x11 -ffixed-x12 -ffixed-x13 -ffixed-x14 -ffixed-x15 -ffixed-x16 -ffixed-x17 -ffixed-x18 -ffixed-x19 -ffixed-x20 -ffixed-x21 -ffixed-x22 -ffixed-x23 -ffixed-x24 -ffixed-x25 -ffixed-x26 -ffixed-x27 -ffixed-28 -ffixed-29 --save-temps -mgeneral-regs-only -fno-ipa-cp" } */
+
+extern void abort ();
+
+int
+dec (int a, int b)
+{
+  return a + b;
+}
+
+int
+cal (int a, int b)
+{
+  int sum1 = a * b;
+  int sum2 = a / b;
+  int sum = dec (sum1, sum2);
+  return a + b + sum + sum1 + sum2;
+}
+
+int
+main (int argc, char **argv)
+{
+  int ret = cal (2, 1);
+
+  if (ret != 11)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times "str\tx30, \\\[sp, -\[0-9\]+\\\]!" 2 } } */
+/* { dg-final { scan-assembler "str\tw30, \\\[sp, \[0-9\]+\\\]" } } */
+
+/* { dg-final { scan-assembler "ldr\tw30, \\\[sp, \[0-9\]+\\\]" } } */
+/* { dg-final { scan-assembler-times "ldr\tx30, \\\[sp\\\], \[0-9\]+" 2 } } */
+
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/fuse_adrp_add_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/fuse_adrp_add_1.c
@@ -0,0 +1,45 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mcpu=cortex-a57" } */
+
+enum reg_class { NO_REGS, AP_REG, XRF_REGS, GENERAL_REGS, AGRF_REGS,
+                 XGRF_REGS, ALL_REGS, LIM_REG_CLASSES };
+
+enum rtx_code { REG,  LAST_AND_UNUSED_RTX_CODE };
+
+typedef union rtunion_def
+{
+  int rtint;
+} rtunion;
+
+typedef struct rtx_def
+{
+  unsigned int volatil : 1;
+  rtunion fld[1];
+} *rtx;
+
+extern char fixed_regs[64];
+extern char global_regs[64];
+
+int
+rtx_cost (rtx x, int outer_code)
+{
+  register enum rtx_code code;
+  switch (code)
+    {
+      case REG:
+        return ! ((((x)->volatil) && ((x)->fld[0].rtint) < 64)
+                 || ((((x)->fld[0].rtint)) == 30 || (((x)->fld[0].rtint)) == 30
+                 || (((x)->fld[0].rtint)) == 31 || (((x)->fld[0].rtint)) == 0
+                 || ((((x)->fld[0].rtint)) >= (64)
+                     && (((x)->fld[0].rtint)) <= (((64)) + 3))
+                 || ((((x)->fld[0].rtint)) < 64 && ((((x)->fld[0].rtint)) == 30
+                 || (((x)->fld[0].rtint)) == 30 || fixed_regs[((x)->fld[0].rtint)]
+                 || global_regs[((x)->fld[0].rtint)])
+                    && ((((x)->fld[0].rtint))
+                          ? ((((x)->fld[0].rtint) < 32)
+                             ? GENERAL_REGS : XRF_REGS)
+                          : AP_REG) != NO_REGS)));
+    }
+}
+
+/* { dg-final { scan-assembler "adrp\tx.*, fixed_regs\n\tadd\tx.*, x.*fixed_regs" } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_2.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_2.c
@@ -0,0 +1,20 @@
+/* Verify:
+     * -fomit-frame-pointer.
+     * without outgoing.
+     * total frame size <= 256.
+     * number of callee-save regs >= 2.
+     * optimized code should use "stp !" for stack adjustment.  */
+
+/* { dg-do run } */
+/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */
+
+#include "test_frame_common.h"
+
+t_frame_pattern (test2, 200, "x19")
+t_frame_run (test2)
+
+
+/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp, -\[0-9\]+\\\]!" 1 } } */
+/* { dg-final { scan-assembler-times "ldp\tx19, x30, \\\[sp\\\], \[0-9\]+" 2 } } */
+
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/legitimize_stack_var_before_reload_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/legitimize_stack_var_before_reload_1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-expand" } */
+
+extern void initialize_array (unsigned char *, int);
+
+int
+test15 (void)
+{
+  unsigned char a[480];
+
+  initialize_array (a, 480);
+
+  if (a[0] == 0x10)
+    return 1;
+
+  return 0;
+}
+
+/* { dg-final { scan-rtl-dump "\\(mem\[^\\n\]*\\(plus\[^\\n\]*virtual-stack-vars" "expand" } } */
+
+/* { dg-final { cleanup-rtl-dump "expand" } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/vreinterpret_f64_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/vreinterpret_f64_1.c
@@ -0,0 +1,596 @@
+/* Test vreinterpret_f64_* and vreinterpret_*_f64 intrinsics work correctly.  */
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+
+#include <arm_neon.h>
+
+extern void abort (void);
+
+#define ABS(a) __builtin_fabs (a)
+#define ISNAN(a) __builtin_isnan (a)
+
+#define DOUBLE_EQUALS(a, b, epsilon)		\
+(						\
+ ((a) == (b))					\
+  || (ISNAN (a) && ISNAN (b))			\
+  || (ABS (a - b) < epsilon)			\
+)
+
+/* Pi accurate up to 16 digits.
+   Further digits are a closest binary approximation.  */
+#define PI_F64 3.14159265358979311599796346854
+/* Hex representation in Double (IEEE754 Double precision 64-bit) is:
+   0x400921FB54442D18.  */
+
+/* E accurate up to 16 digits.
+   Further digits are a closest binary approximation.  */
+#define E_F64 2.71828182845904509079559829843
+/* Hex representation in Double (IEEE754 Double precision 64-bit) is:
+   0x4005BF0A8B145769.  */
+
+float32x2_t __attribute__ ((noinline))
+wrap_vreinterpret_f32_f64 (float64x1_t __a)
+{
+  return vreinterpret_f32_f64 (__a);
+}
+
+int __attribute__ ((noinline))
+test_vreinterpret_f32_f64 ()
+{
+  float64x1_t a;
+  float32x2_t b;
+  float64_t c[1] = { PI_F64 };
+  /* Values { 0x54442D18, 0x400921FB } reinterpreted as f32.  */
+  float32_t d[2] = { 3.3702805504E12, 2.1426990032196044921875E0 };
+  float32_t e[2];
+  int i;
+
+  a = vld1_f64 (c);
+  b = wrap_vreinterpret_f32_f64 (a);
+  vst1_f32 (e, b);
+  for (i = 0; i < 2; i++)
+    if (!DOUBLE_EQUALS (d[i], e[i], __FLT_EPSILON__))
+      return 1;
+  return 0;
+};
+
+int8x8_t __attribute__ ((noinline))
+wrap_vreinterpret_s8_f64 (float64x1_t __a)
+{
+  return vreinterpret_s8_f64 (__a);
+}
+
+int __attribute__ ((noinline))
+test_vreinterpret_s8_f64 ()
+{
+  float64x1_t a;
+  int8x8_t b;
+  float64_t c[1] = { PI_F64 };
+  int8_t d[8] = { 0x18, 0x2D, 0x44, 0x54, 0xFB, 0x21, 0x09, 0x40 };
+  int8_t e[8];
+  int i;
+
+  a = vld1_f64 (c);
+  b = wrap_vreinterpret_s8_f64 (a);
+  vst1_s8 (e, b);
+  for (i = 0; i < 8; i++)
+    if (d[i] != e[i])
+      return 1;
+  return 0;
+};
+
+int16x4_t __attribute__ ((noinline))
+wrap_vreinterpret_s16_f64 (float64x1_t __a)
+{
+  return vreinterpret_s16_f64 (__a);
+}
+
+int __attribute__ ((noinline))
+test_vreinterpret_s16_f64 ()
+{
+  float64x1_t a;
+  int16x4_t b;
+  float64_t c[1] = { PI_F64 };
+  int16_t d[4] = { 0x2D18, 0x5444, 0x21FB, 0x4009 };
+  int16_t e[4];
+  int i;
+
+  a = vld1_f64 (c);
+  b = wrap_vreinterpret_s16_f64 (a);
+  vst1_s16 (e, b);
+  for (i = 0; i < 4; i++)
+    if (d[i] != e[i])
+      return 1;
+  return 0;
+};
+
+int32x2_t __attribute__ ((noinline))
+wrap_vreinterpret_s32_f64 (float64x1_t __a)
+{
+  return vreinterpret_s32_f64 (__a);
+}
+
+int __attribute__ ((noinline))
+test_vreinterpret_s32_f64 ()
+{
+  float64x1_t a;
+  int32x2_t b;
+  float64_t c[1] = { PI_F64 };
+  int32_t d[2] = { 0x54442D18, 0x400921FB };
+  int32_t e[2];
+  int i;
+
+  a = vld1_f64 (c);
+  b = wrap_vreinterpret_s32_f64 (a);
+  vst1_s32 (e, b);
+  for (i = 0; i < 2; i++)
+    if (d[i] != e[i])
+      return 1;
+  return 0;
+};
+
+int64x1_t __attribute__ ((noinline))
+wrap_vreinterpret_s64_f64 (float64x1_t __a)
+{
+  return vreinterpret_s64_f64 (__a);
+}
+
+int __attribute__ ((noinline))
+test_vreinterpret_s64_f64 ()
+{
+  float64x1_t a;
+  int64x1_t b;
+  float64_t c[1] = { PI_F64 };
+  int64_t d[1] = { 0x400921FB54442D18 };
+  int64_t e[1];
+  int i;
+
+  a = vld1_f64 (c);
+  b = wrap_vreinterpret_s64_f64 (a);
+  vst1_s64 (e, b);
+  if (d[0] != e[0])
+    return 1;
+  return 0;
+};
+
+float32x4_t __attribute__ ((noinline))
+wrap_vreinterpretq_f32_f64 (float64x2_t __a)
+{
+  return vreinterpretq_f32_f64 (__a);
+}
+
+int __attribute__ ((noinline))
+test_vreinterpretq_f32_f64 ()
+{
+  float64x2_t a;
+  float32x4_t b;
+  float64_t c[2] = { PI_F64, E_F64 };
+
+  /* Values corresponding to f32 reinterpret of
+     { 0x54442D18, 0x400921FB, 0x8B145769, 0x4005BF0A }.  */
+  float32_t d[4] = { 3.3702805504E12,
+		     2.1426990032196044921875E0,
+		     -2.8569523269651966444143014594E-32,
+		     2.089785099029541015625E0 };
+  float32_t e[4];
+  int i;
+
+  a = vld1q_f64 (c);
+  b = wrap_vreinterpretq_f32_f64 (a);
+  vst1q_f32 (e, b);
+  for (i = 0; i < 4; i++)
+    {
+      if (!DOUBLE_EQUALS (d[i], e[i], __FLT_EPSILON__))
+	return 1;
+    }
+  return 0;
+};
+
+int8x16_t __attribute__ ((noinline))
+wrap_vreinterpretq_s8_f64 (float64x2_t __a)
+{
+  return vreinterpretq_s8_f64 (__a);
+}
+
+int __attribute__ ((noinline))
+test_vreinterpretq_s8_f64 ()
+{
+  float64x2_t a;
+  int8x16_t b;
+  float64_t c[2] = { PI_F64, E_F64 };
+  int8_t d[16] = { 0x18, 0x2D, 0x44, 0x54, 0xFB, 0x21, 0x09, 0x40,
+		   0x69, 0x57, 0x14, 0x8B, 0x0A, 0xBF, 0x05, 0x40 };
+  int8_t e[16];
+  int i;
+
+  a = vld1q_f64 (c);
+  b = wrap_vreinterpretq_s8_f64 (a);
+  vst1q_s8 (e, b);
+  for (i = 0; i < 16; i++)
+    if (d[i] != e[i])
+      return 1;
+  return 0;
+};
+
+int16x8_t __attribute__ ((noinline))
+wrap_vreinterpretq_s16_f64 (float64x2_t __a)
+{
+  return vreinterpretq_s16_f64 (__a);
+}
+
+int __attribute__ ((noinline))
+test_vreinterpretq_s16_f64 ()
+{
+  float64x2_t a;
+  int16x8_t b;
+  float64_t c[2] = { PI_F64, E_F64 };
+  int16_t d[8] = { 0x2D18, 0x5444, 0x21FB, 0x4009,
+		   0x5769, 0x8B14, 0xBF0A, 0x4005 };
+  int16_t e[8];
+  int i;
+
+  a = vld1q_f64 (c);
+  b = wrap_vreinterpretq_s16_f64 (a);
+  vst1q_s16 (e, b);
+  for (i = 0; i < 8; i++)
+    if (d[i] != e[i])
+      return 1;
+  return 0;
+};
+
+int32x4_t __attribute__ ((noinline))
+wrap_vreinterpretq_s32_f64 (float64x2_t __a)
+{
+  return vreinterpretq_s32_f64 (__a);
+}
+
+int __attribute__ ((noinline))
+test_vreinterpretq_s32_f64 ()
+{
+  float64x2_t a;
+  int32x4_t b;
+  float64_t c[2] = { PI_F64, E_F64 };
+  int32_t d[4] = { 0x54442D18, 0x400921FB, 0x8B145769, 0x4005BF0A };
+  int32_t e[4];
+  int i;
+
+  a = vld1q_f64 (c);
+  b = wrap_vreinterpretq_s32_f64 (a);
+  vst1q_s32 (e, b);
+  for (i = 0; i < 4; i++)
+    if (d[i] != e[i])
+      return 1;
+  return 0;
+};
+
+int64x2_t __attribute__ ((noinline))
+wrap_vreinterpretq_s64_f64 (float64x2_t __a)
+{
+  return vreinterpretq_s64_f64 (__a);
+}
+
+int __attribute__ ((noinline))
+test_vreinterpretq_s64_f64 ()
+{
+  float64x2_t a;
+  int64x2_t b;
+  float64_t c[2] = { PI_F64, E_F64 };
+  int64_t d[2] = { 0x400921FB54442D18, 0x4005BF0A8B145769 };
+  int64_t e[2];
+  int i;
+
+  a = vld1q_f64 (c);
+  b = wrap_vreinterpretq_s64_f64 (a);
+  vst1q_s64 (e, b);
+  for (i = 0; i < 2; i++)
+    if (d[i] != e[i])
+      return 1;
+  return 0;
+};
+
+float64x1_t __attribute__ ((noinline))
+wrap_vreinterpret_f64_f32 (float32x2_t __a)
+{
+  return vreinterpret_f64_f32 (__a);
+}
+
+int __attribute__ ((noinline))
+test_vreinterpret_f64_f32 ()
+{
+  float32x2_t a;
+  float64x1_t b;
+  /* Values { 0x54442D18, 0x400921FB } reinterpreted as f32.  */
+  float32_t c[2] = { 3.3702805504E12, 2.1426990032196044921875E0 };
+  float64_t d[1] = { PI_F64 };
+  float64_t e[1];
+  int i;
+
+  a = vld1_f32 (c);
+  b = wrap_vreinterpret_f64_f32 (a);
+  vst1_f64 (e, b);
+  if (!DOUBLE_EQUALS (d[0], e[0], __DBL_EPSILON__))
+    return 1;
+  return 0;
+};
+
+float64x1_t __attribute__ ((noinline))
+wrap_vreinterpret_f64_s8 (int8x8_t __a)
+{
+  return vreinterpret_f64_s8 (__a);
+}
+
+int __attribute__ ((noinline))
+test_vreinterpret_f64_s8 ()
+{
+  int8x8_t a;
+  float64x1_t b;
+  int8_t c[8] = { 0x18, 0x2D, 0x44, 0x54, 0xFB, 0x21, 0x09, 0x40 };
+  float64_t d[1] = { PI_F64 };
+  float64_t e[1];
+  int i;
+
+  a = vld1_s8 (c);
+  b = wrap_vreinterpret_f64_s8 (a);
+  vst1_f64 (e, b);
+  if (!DOUBLE_EQUALS (d[0], e[0], __DBL_EPSILON__))
+    return 1;
+  return 0;
+};
+
+float64x1_t __attribute__ ((noinline))
+wrap_vreinterpret_f64_s16 (int16x4_t __a)
+{
+  return vreinterpret_f64_s16 (__a);
+}
+
+int __attribute__ ((noinline))
+test_vreinterpret_f64_s16 ()
+{
+  int16x4_t a;
+  float64x1_t b;
+  int16_t c[4] = { 0x2D18, 0x5444, 0x21FB, 0x4009 };
+  float64_t d[1] = { PI_F64 };
+  float64_t e[1];
+  int i;
+
+  a = vld1_s16 (c);
+  b = wrap_vreinterpret_f64_s16 (a);
+  vst1_f64 (e, b);
+  if (!DOUBLE_EQUALS (d[0], e[0], __DBL_EPSILON__))
+    return 1;
+  return 0;
+};
+
+float64x1_t __attribute__ ((noinline))
+wrap_vreinterpret_f64_s32 (int32x2_t __a)
+{
+  return vreinterpret_f64_s32 (__a);
+}
+
+int __attribute__ ((noinline))
+test_vreinterpret_f64_s32 ()
+{
+  int32x2_t a;
+  float64x1_t b;
+  int32_t c[2] = { 0x54442D18, 0x400921FB };
+  float64_t d[1] = { PI_F64 };
+  float64_t e[1];
+  int i;
+
+  a = vld1_s32 (c);
+  b = wrap_vreinterpret_f64_s32 (a);
+  vst1_f64 (e, b);
+  if (!DOUBLE_EQUALS (d[0], e[0], __DBL_EPSILON__))
+    return 1;
+  return 0;
+};
+
+float64x1_t __attribute__ ((noinline))
+wrap_vreinterpret_f64_s64 (int64x1_t __a)
+{
+  return vreinterpret_f64_s64 (__a);
+}
+
+int __attribute__ ((noinline))
+test_vreinterpret_f64_s64 ()
+{
+  int64x1_t a;
+  float64x1_t b;
+  int64_t c[1] = { 0x400921FB54442D18 };
+  float64_t d[1] = { PI_F64 };
+  float64_t e[1];
+
+  a = vld1_s64 (c);
+  b = wrap_vreinterpret_f64_s64 (a);
+  vst1_f64 (e, b);
+  if (!DOUBLE_EQUALS (d[0], e[0], __DBL_EPSILON__))
+    return 1;
+  return 0;
+};
+
+float64x2_t __attribute__ ((noinline))
+wrap_vreinterpretq_f64_f32 (float32x4_t __a)
+{
+  return vreinterpretq_f64_f32 (__a);
+}
+
+int __attribute__ ((noinline))
+test_vreinterpretq_f64_f32 ()
+{
+  float32x4_t a;
+  float64x2_t b;
+  /* Values corresponding to f32 reinterpret of
+     { 0x54442D18, 0x400921FB, 0x8B145769, 0x4005BF0A }.  */
+  float32_t c[4] = { 3.3702805504E12,
+		     2.1426990032196044921875E0,
+		     -2.8569523269651966444143014594E-32,
+		     2.089785099029541015625E0 };
+
+  float64_t d[2] = { PI_F64, E_F64 };
+  float64_t e[2];
+  int i;
+
+  a = vld1q_f32 (c);
+  b = wrap_vreinterpretq_f64_f32 (a);
+  vst1q_f64 (e, b);
+  for (i = 0; i < 2; i++)
+    if (!DOUBLE_EQUALS (d[i], e[i], __DBL_EPSILON__))
+      return 1;
+  return 0;
+};
+
+float64x2_t __attribute__ ((noinline))
+wrap_vreinterpretq_f64_s8 (int8x16_t __a)
+{
+  return vreinterpretq_f64_s8 (__a);
+}
+
+int __attribute__ ((noinline))
+test_vreinterpretq_f64_s8 ()
+{
+  int8x16_t a;
+  float64x2_t b;
+  int8_t c[16] = { 0x18, 0x2D, 0x44, 0x54, 0xFB, 0x21, 0x09, 0x40,
+		   0x69, 0x57, 0x14, 0x8B, 0x0A, 0xBF, 0x05, 0x40 };
+  float64_t d[2] = { PI_F64, E_F64 };
+  float64_t e[2];
+  int i;
+
+  a = vld1q_s8 (c);
+  b = wrap_vreinterpretq_f64_s8 (a);
+  vst1q_f64 (e, b);
+  for (i = 0; i < 2; i++)
+    if (!DOUBLE_EQUALS (d[i], e[i], __DBL_EPSILON__))
+      return 1;
+  return 0;
+};
+
+float64x2_t __attribute__ ((noinline))
+wrap_vreinterpretq_f64_s16 (int16x8_t __a)
+{
+  return vreinterpretq_f64_s16 (__a);
+}
+
+int __attribute__ ((noinline))
+test_vreinterpretq_f64_s16 ()
+{
+  int16x8_t a;
+  float64x2_t b;
+  int16_t c[8] = { 0x2D18, 0x5444, 0x21FB, 0x4009,
+		   0x5769, 0x8B14, 0xBF0A, 0x4005 };
+  float64_t d[2] = { PI_F64, E_F64 };
+  float64_t e[2];
+  int i;
+
+  a = vld1q_s16 (c);
+  b = wrap_vreinterpretq_f64_s16 (a);
+  vst1q_f64 (e, b);
+  for (i = 0; i < 2; i++)
+    if (!DOUBLE_EQUALS (d[i], e[i], __DBL_EPSILON__))
+      return 1;
+  return 0;
+};
+
+float64x2_t __attribute__ ((noinline))
+wrap_vreinterpretq_f64_s32 (int32x4_t __a)
+{
+  return vreinterpretq_f64_s32 (__a);
+}
+
+int __attribute__ ((noinline))
+test_vreinterpretq_f64_s32 ()
+{
+  int32x4_t a;
+  float64x2_t b;
+  int32_t c[4] = { 0x54442D18, 0x400921FB, 0x8B145769, 0x4005BF0A };
+  float64_t d[2] = { PI_F64, E_F64 };
+  float64_t e[2];
+  int i;
+
+  a = vld1q_s32 (c);
+  b = wrap_vreinterpretq_f64_s32 (a);
+  vst1q_f64 (e, b);
+  for (i = 0; i < 2; i++)
+    if (!DOUBLE_EQUALS (d[i], e[i], __DBL_EPSILON__))
+      return 1;
+  return 0;
+};
+
+float64x2_t __attribute__ ((noinline))
+wrap_vreinterpretq_f64_s64 (int64x2_t __a)
+{
+  return vreinterpretq_f64_s64 (__a);
+}
+
+int __attribute__ ((noinline))
+test_vreinterpretq_f64_s64 ()
+{
+  int64x2_t a;
+  float64x2_t b;
+  int64_t c[2] = { 0x400921FB54442D18, 0x4005BF0A8B145769 };
+  float64_t d[2] = { PI_F64, E_F64 };
+  float64_t e[2];
+  int i;
+
+  a = vld1q_s64 (c);
+  b = wrap_vreinterpretq_f64_s64 (a);
+  vst1q_f64 (e, b);
+  for (i = 0; i < 2; i++)
+    if (!DOUBLE_EQUALS (d[i], e[i], __DBL_EPSILON__))
+      return 1;
+  return 0;
+};
+
+int
+main (int argc, char **argv)
+{
+  if (test_vreinterpret_f32_f64 ())
+    abort ();
+
+  if (test_vreinterpret_s8_f64 ())
+    abort ();
+  if (test_vreinterpret_s16_f64 ())
+    abort ();
+  if (test_vreinterpret_s32_f64 ())
+    abort ();
+  if (test_vreinterpret_s64_f64 ())
+    abort ();
+
+  if (test_vreinterpretq_f32_f64 ())
+    abort ();
+
+  if (test_vreinterpretq_s8_f64 ())
+    abort ();
+  if (test_vreinterpretq_s16_f64 ())
+    abort ();
+  if (test_vreinterpretq_s32_f64 ())
+    abort ();
+  if (test_vreinterpretq_s64_f64 ())
+    abort ();
+
+  if (test_vreinterpret_f64_f32 ())
+    abort ();
+
+  if (test_vreinterpret_f64_s8 ())
+    abort ();
+  if (test_vreinterpret_f64_s16 ())
+    abort ();
+  if (test_vreinterpret_f64_s32 ())
+    abort ();
+  if (test_vreinterpret_f64_s64 ())
+    abort ();
+
+  if (test_vreinterpretq_f64_f32 ())
+    abort ();
+
+  if (test_vreinterpretq_f64_s8 ())
+    abort ();
+  if (test_vreinterpretq_f64_s16 ())
+    abort ();
+  if (test_vreinterpretq_f64_s32 ())
+    abort ();
+  if (test_vreinterpretq_f64_s64 ())
+    abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/test_fp_attribute_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/test_fp_attribute_1.c
@@ -21,6 +21,6 @@
   leaf ();
 }
 
-/* { dg-final { scan-assembler-times "str\tx30, \\\[sp\\\]" 2 } } */
+/* { dg-final { scan-assembler-times "str\tx30, \\\[sp, -\[0-9\]+\\\]!" 2 } } */
 
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/vect.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect.x
@@ -2,6 +2,7 @@
 typedef unsigned int *__restrict__ pRUINT;
 typedef long long *__restrict__ pRINT64;
 typedef unsigned long long *__restrict__ pRUINT64;
+extern int abs (int j);
 
 void test_orn (pRUINT a, pRUINT b, pRUINT c)
 {
--- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_14.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_14.c
@@ -0,0 +1,12 @@
+/* Verify:
+     * with outgoing.
+     * total frame size > 512.
+     * number of callee-save reg >= 2.  */
+
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+#include "test_frame_common.h"
+
+t_frame_pattern_outgoing (test14, 700, , 8, a[8])
+t_frame_run (test14)
--- a/src/gcc/testsuite/gcc.target/aarch64/lr_free_2.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/lr_free_2.c
@@ -0,0 +1,29 @@
+/* { dg-do run } */
+/* { dg-options "-fno-inline -O2 -ffixed-x2 -ffixed-x3 -ffixed-x4 -ffixed-x5 -ffixed-x6 -ffixed-x7 -ffixed-x8 -ffixed-x9 -ffixed-x10 -ffixed-x11 -ffixed-x12 -ffixed-x13 -ffixed-x14 -ffixed-x15 -ffixed-x16 -ffixed-x17 -ffixed-x18 -ffixed-x19 -ffixed-x20 -ffixed-x21 -ffixed-x22 -ffixed-x23 -ffixed-x24 -ffixed-x25 -ffixed-x26 -ffixed-x27 -ffixed-x28 --save-temps -mgeneral-regs-only -fno-ipa-cp -fdump-rtl-ira" } */
+
+extern void abort ();
+
+int
+cal (int a, int b)
+{
+  /* { dg-final { scan-assembler-times "stp\tx29, x30, \\\[sp, -\[0-9\]+\\\]!" 2 } } */
+  int sum = a + b;
+  int sum1 = a * b;
+  /* { dg-final { scan-assembler-times "ldp\tx29, x30, \\\[sp\\\], \[0-9\]+" 2 } } */
+  /* { dg-final { scan-rtl-dump "assign reg 30" "ira" } } */
+  return (a + b + sum + sum1);
+}
+
+int
+main (int argc, char **argv)
+{
+  int ret = cal (1, 2);
+
+  if (ret != 8)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { cleanup-saved-temps } } */
+/* { dg-final { cleanup-rtl-dump "ira" } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_3.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_3.c
@@ -0,0 +1,14 @@
+/* Verify:
+     * -fomit-frame-pointer.
+     * without outgoing.
+     * total frame size <= 512 but > 256.
+     * number of callee-save reg == 1.
+     * we can't use "str !" to optimize stack adjustment.  */
+
+/* { dg-do run } */
+/* { dg-options "-O2 -fomit-frame-pointer" } */
+
+#include "test_frame_common.h"
+
+t_frame_pattern (test3, 400, )
+t_frame_run (test3)
--- a/src/gcc/testsuite/gcc.target/aarch64/pic-constantpool1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/pic-constantpool1.c
@@ -2,10 +2,13 @@
 /* { dg-do compile } */
 
 extern int __finite (double __value) __attribute__ ((__nothrow__)) __attribute__ ((__const__));
+extern int __finitef (float __value) __attribute__ ((__nothrow__)) __attribute__ ((__const__));
+extern int __signbit (double __value) __attribute__ ((__nothrow__)) __attribute__ ((__const__));
+extern int __signbitf (float __value) __attribute__ ((__nothrow__)) __attribute__ ((__const__));
 int
 __ecvt_r (value, ndigit, decpt, sign, buf, len)
      double value;
-     int ndigit, *decpt, *sign;
+     int ndigit, *decpt, *sign, len;
      char *buf;
 {
   if ((sizeof (value) == sizeof (float) ? __finitef (value) : __finite (value)) && value != 0.0)
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vpaddd_s64.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vpaddd_s64.c
@@ -0,0 +1,27 @@
+/* Test the vpaddd_s64 AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3" } */
+
+#include "arm_neon.h"
+
+#define SIZE 6
+
+extern void abort (void);
+
+int64_t in[SIZE] = { -4l, 4l, -2l, 2l, -1l, 1l };
+
+int
+main (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE / 2; ++i)
+    if (vpaddd_s64 (vld1q_s64 (in + 2 * i)) != 0)
+      abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-assembler "addp\[ \t\]+\[dD\]\[0-9\]+, v\[0-9\].2d+\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s16.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s16.x
@@ -0,0 +1,114 @@
+extern void abort (void);
+
+int16x8_t
+test_vextq_s16_1 (int16x8_t a, int16x8_t b)
+{
+  return vextq_s16 (a, b, 1);
+}
+
+int16x8_t
+test_vextq_s16_2 (int16x8_t a, int16x8_t b)
+{
+  return vextq_s16 (a, b, 2);
+}
+
+int16x8_t
+test_vextq_s16_3 (int16x8_t a, int16x8_t b)
+{
+  return vextq_s16 (a, b, 3);
+}
+
+int16x8_t
+test_vextq_s16_4 (int16x8_t a, int16x8_t b)
+{
+  return vextq_s16 (a, b, 4);
+}
+
+int16x8_t
+test_vextq_s16_5 (int16x8_t a, int16x8_t b)
+{
+  return vextq_s16 (a, b, 5);
+}
+
+int16x8_t
+test_vextq_s16_6 (int16x8_t a, int16x8_t b)
+{
+  return vextq_s16 (a, b, 6);
+}
+
+int16x8_t
+test_vextq_s16_7 (int16x8_t a, int16x8_t b)
+{
+  return vextq_s16 (a, b, 7);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i, off;
+  int16_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7};
+  int16x8_t in1 = vld1q_s16 (arr1);
+  int16_t arr2[] = {8, 9, 10, 11, 12, 13, 14, 15};
+  int16x8_t in2 = vld1q_s16 (arr2);
+  int16_t exp[8];
+  int16x8_t expected;
+  int16x8_t actual = test_vextq_s16_1 (in1, in2);
+
+  for (i = 0; i < 8; i++)
+    exp[i] = i + 1;
+  expected = vld1q_s16 (exp);
+  for (i = 0; i < 8; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_s16_2 (in1, in2);
+  for (i = 0; i < 8; i++)
+    exp[i] = i + 2;
+  expected = vld1q_s16 (exp);
+  for (i = 0; i < 8; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_s16_3 (in1, in2);
+  for (i = 0; i < 8; i++)
+    exp[i] = i + 3;
+  expected = vld1q_s16 (exp);
+  for (i = 0; i < 8; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_s16_4 (in1, in2);
+  for (i = 0; i < 8; i++)
+    exp[i] = i + 4;
+  expected = vld1q_s16 (exp);
+  for (i = 0; i < 8; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_s16_5 (in1, in2);
+  for (i = 0; i < 8; i++)
+    exp[i] = i + 5;
+  expected = vld1q_s16 (exp);
+  for (i = 0; i < 8; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_s16_6 (in1, in2);
+  for (i = 0; i < 8; i++)
+    exp[i] = i + 6;
+  expected = vld1q_s16 (exp);
+  for (i = 0; i < 8; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_s16_7 (in1, in2);
+  for (i = 0; i < 8; i++)
+    exp[i] = i + 7;
+  expected = vld1q_s16 (exp);
+  for (i = 0; i < 8; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vpaddd_u64.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vpaddd_u64.c
@@ -0,0 +1,27 @@
+/* Test the vpaddd_u64 AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3" } */
+
+#include "arm_neon.h"
+
+#define SIZE 6
+
+extern void abort (void);
+
+uint64_t in[SIZE] = { 4ul, 4ul, 2ul, 2ul, 1ul, 1ul };
+
+int
+main (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE / 2; ++i)
+    if (vpaddd_u64 (vld1q_u64 (in + 2 * i)) != 2 * in[2 * i])
+      abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-assembler "addp\[ \t\]+\[dD\]\[0-9\]+, v\[0-9\].2d+\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u8.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u8.x
@@ -0,0 +1,114 @@
+extern void abort (void);
+
+uint8x8_t
+test_vext_u8_1 (uint8x8_t a, uint8x8_t b)
+{
+  return vext_u8 (a, b, 1);
+}
+
+uint8x8_t
+test_vext_u8_2 (uint8x8_t a, uint8x8_t b)
+{
+  return vext_u8 (a, b, 2);
+}
+
+uint8x8_t
+test_vext_u8_3 (uint8x8_t a, uint8x8_t b)
+{
+  return vext_u8 (a, b, 3);
+}
+
+uint8x8_t
+test_vext_u8_4 (uint8x8_t a, uint8x8_t b)
+{
+  return vext_u8 (a, b, 4);
+}
+
+uint8x8_t
+test_vext_u8_5 (uint8x8_t a, uint8x8_t b)
+{
+  return vext_u8 (a, b, 5);
+}
+
+uint8x8_t
+test_vext_u8_6 (uint8x8_t a, uint8x8_t b)
+{
+  return vext_u8 (a, b, 6);
+}
+
+uint8x8_t
+test_vext_u8_7 (uint8x8_t a, uint8x8_t b)
+{
+  return vext_u8 (a, b, 7);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i, off;
+  uint8_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7};
+  uint8x8_t in1 = vld1_u8 (arr1);
+  uint8_t arr2[] = {8, 9, 10, 11, 12, 13, 14, 15};
+  uint8x8_t in2 = vld1_u8 (arr2);
+  uint8_t exp[8];
+  uint8x8_t expected;
+  uint8x8_t actual = test_vext_u8_1 (in1, in2);
+
+  for (i = 0; i < 8; i++)
+    exp[i] = i + 1;
+  expected = vld1_u8 (exp);
+  for (i = 0; i < 8; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vext_u8_2 (in1, in2);
+  for (i = 0; i < 8; i++)
+    exp[i] = i + 2;
+  expected = vld1_u8 (exp);
+  for (i = 0; i < 8; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vext_u8_3 (in1, in2);
+  for (i = 0; i < 8; i++)
+    exp[i] = i + 3;
+  expected = vld1_u8 (exp);
+  for (i = 0; i < 8; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vext_u8_4 (in1, in2);
+  for (i = 0; i < 8; i++)
+    exp[i] = i + 4;
+  expected = vld1_u8 (exp);
+  for (i = 0; i < 8; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vext_u8_5 (in1, in2);
+  for (i = 0; i < 8; i++)
+    exp[i] = i + 5;
+  expected = vld1_u8 (exp);
+  for (i = 0; i < 8; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vext_u8_6 (in1, in2);
+  for (i = 0; i < 8; i++)
+    exp[i] = i + 6;
+  expected = vld1_u8 (exp);
+  for (i = 0; i < 8; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vext_u8_7 (in1, in2);
+  for (i = 0; i < 8; i++)
+    exp[i] = i + 7;
+  expected = vld1_u8 (exp);
+  for (i = 0; i < 8; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u16.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u16.x
@@ -0,0 +1,114 @@
+extern void abort (void);
+
+uint16x8_t
+test_vextq_u16_1 (uint16x8_t a, uint16x8_t b)
+{
+  return vextq_u16 (a, b, 1);
+}
+
+uint16x8_t
+test_vextq_u16_2 (uint16x8_t a, uint16x8_t b)
+{
+  return vextq_u16 (a, b, 2);
+}
+
+uint16x8_t
+test_vextq_u16_3 (uint16x8_t a, uint16x8_t b)
+{
+  return vextq_u16 (a, b, 3);
+}
+
+uint16x8_t
+test_vextq_u16_4 (uint16x8_t a, uint16x8_t b)
+{
+  return vextq_u16 (a, b, 4);
+}
+
+uint16x8_t
+test_vextq_u16_5 (uint16x8_t a, uint16x8_t b)
+{
+  return vextq_u16 (a, b, 5);
+}
+
+uint16x8_t
+test_vextq_u16_6 (uint16x8_t a, uint16x8_t b)
+{
+  return vextq_u16 (a, b, 6);
+}
+
+uint16x8_t
+test_vextq_u16_7 (uint16x8_t a, uint16x8_t b)
+{
+  return vextq_u16 (a, b, 7);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i, off;
+  uint16_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7};
+  uint16x8_t in1 = vld1q_u16 (arr1);
+  uint16_t arr2[] = {8, 9, 10, 11, 12, 13, 14, 15};
+  uint16x8_t in2 = vld1q_u16 (arr2);
+  uint16_t exp[8];
+  uint16x8_t expected;
+  uint16x8_t actual = test_vextq_u16_1 (in1, in2);
+
+  for (i = 0; i < 8; i++)
+    exp[i] = i + 1;
+  expected = vld1q_u16 (exp);
+  for (i = 0; i < 8; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_u16_2 (in1, in2);
+  for (i = 0; i < 8; i++)
+    exp[i] = i + 2;
+  expected = vld1q_u16 (exp);
+  for (i = 0; i < 8; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_u16_3 (in1, in2);
+  for (i = 0; i < 8; i++)
+    exp[i] = i + 3;
+  expected = vld1q_u16 (exp);
+  for (i = 0; i < 8; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_u16_4 (in1, in2);
+  for (i = 0; i < 8; i++)
+    exp[i] = i + 4;
+  expected = vld1q_u16 (exp);
+  for (i = 0; i < 8; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_u16_5 (in1, in2);
+  for (i = 0; i < 8; i++)
+    exp[i] = i + 5;
+  expected = vld1q_u16 (exp);
+  for (i = 0; i < 8; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_u16_6 (in1, in2);
+  for (i = 0; i < 8; i++)
+    exp[i] = i + 6;
+  expected = vld1q_u16 (exp);
+  for (i = 0; i < 8; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_u16_7 (in1, in2);
+  for (i = 0; i < 8; i++)
+    exp[i] = i + 7;
+  expected = vld1q_u16 (exp);
+  for (i = 0; i < 8; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzips16_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzips16_1.c
@@ -0,0 +1,11 @@
+/* Test the `vzip_s16' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vzips16.x"
+
+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs16.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs16.x
@@ -0,0 +1,26 @@
+extern void abort (void);
+
+int16x8x2_t
+test_vuzpqs16 (int16x8_t _a, int16x8_t _b)
+{
+  return vuzpq_s16 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  int16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
+  int16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
+  int16x8x2_t result = test_vuzpqs16 (vld1q_s16 (first), vld1q_s16 (second));
+  int16_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15};
+  int16_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16};
+  int16x8_t expect1 = vld1q_s16 (exp1);
+  int16x8_t expect2 = vld1q_s16 (exp2);
+
+  for (i = 0; i < 8; i++)
+    if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs8_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs8_1.c
@@ -0,0 +1,11 @@
+/* Test the `vzipq_s8' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vzipqs8.x"
+
+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp8_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp8_1.c
@@ -0,0 +1,10 @@
+/* Test the `vrev64q_p8' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vrev64qp8.x"
+
+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu16_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu16_1.c
@@ -0,0 +1,11 @@
+/* Test the `vtrn_u16' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vtrnu16.x"
+
+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu16.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu16.x
@@ -0,0 +1,26 @@
+extern void abort (void);
+
+uint16x8x2_t
+test_vuzpqu16 (uint16x8_t _a, uint16x8_t _b)
+{
+  return vuzpq_u16 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  uint16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
+  uint16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
+  uint16x8x2_t result = test_vuzpqu16 (vld1q_u16 (first), vld1q_u16 (second));
+  uint16_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15};
+  uint16_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16};
+  uint16x8_t expect1 = vld1q_u16 (exp1);
+  uint16x8_t expect2 = vld1q_u16 (exp2);
+
+  for (i = 0; i < 8; i++)
+    if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu8.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu8.x
@@ -0,0 +1,26 @@
+extern void abort (void);
+
+uint8x8x2_t
+test_vuzpu8 (uint8x8_t _a, uint8x8_t _b)
+{
+  return vuzp_u8 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  uint8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
+  uint8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
+  uint8x8x2_t result = test_vuzpu8 (vld1_u8 (first), vld1_u8 (second));
+  uint8_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15};
+  uint8_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16};
+  uint8x8_t expect1 = vld1_u8 (exp1);
+  uint8x8_t expect2 = vld1_u8 (exp2);
+
+  for (i = 0; i < 8; i++)
+    if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u16_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u16_1.c
@@ -0,0 +1,10 @@
+/* Test the `vextu16' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+#include "ext_u16.x"
+
+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u8_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u8_1.c
@@ -0,0 +1,10 @@
+/* Test the `vextQu8' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+#include "extq_u8.x"
+
+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#?\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 15 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu8.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu8.x
@@ -0,0 +1,22 @@
+extern void abort (void);
+
+uint8x16_t
+test_vrev64qu8 (uint8x16_t _arg)
+{
+  return vrev64q_u8 (_arg);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  uint8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+  uint8x16_t reversed = test_vrev64qu8 (inorder);
+  uint8x16_t expected = {8, 7, 6, 5, 4, 3, 2, 1, 16, 15, 14, 13, 12, 11, 10, 9};
+
+  for (i = 0; i < 16; i++)
+    if (reversed[i] != expected[i])
+      abort ();
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p8_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p8_1.c
@@ -0,0 +1,10 @@
+/* Test the `vrev32_p8' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vrev32p8.x"
+
+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps32.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps32.x
@@ -0,0 +1,26 @@
+extern void abort (void);
+
+int32x2x2_t
+test_vuzps32 (int32x2_t _a, int32x2_t _b)
+{
+  return vuzp_s32 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  int32_t first[] = {1, 2};
+  int32_t second[] = {3, 4};
+  int32x2x2_t result = test_vuzps32 (vld1_s32 (first), vld1_s32 (second));
+  int32_t exp1[] = {1, 3};
+  int32_t exp2[] = {2, 4};
+  int32x2_t expect1 = vld1_s32 (exp1);
+  int32x2_t expect2 = vld1_s32 (exp2);
+
+  for (i = 0; i < 2; i++)
+    if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s64.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s64.x
@@ -0,0 +1,17 @@
+extern void abort (void);
+
+int
+main (int argc, char **argv)
+{
+  int i, off;
+  int64_t arr1[] = {0};
+  int64x1_t in1 = vld1_s64 (arr1);
+  int64_t arr2[] = {1};
+  int64x1_t in2 = vld1_s64 (arr2);
+  int64x1_t actual = vext_s64 (in1, in2, 0);
+  if (actual != in1)
+    abort ();
+
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu32.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu32.x
@@ -0,0 +1,26 @@
+extern void abort (void);
+
+uint32x2x2_t
+test_vuzpu32 (uint32x2_t _a, uint32x2_t _b)
+{
+  return vuzp_u32 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  uint32_t first[] = {1, 2};
+  uint32_t second[] = {3, 4};
+  uint32x2x2_t result = test_vuzpu32 (vld1_u32 (first), vld1_u32 (second));
+  uint32_t exp1[] = {1, 3};
+  uint32_t exp2[] = {2, 4};
+  uint32x2_t expect1 = vld1_u32 (exp1);
+  uint32x2_t expect2 = vld1_u32 (exp2);
+
+  for (i = 0; i < 2; i++)
+    if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u64.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u64.x
@@ -0,0 +1,17 @@
+extern void abort (void);
+
+int
+main (int argc, char **argv)
+{
+  int i, off;
+  uint64_t arr1[] = {0};
+  uint64x1_t in1 = vld1_u64 (arr1);
+  uint64_t arr2[] = {1};
+  uint64x1_t in2 = vld1_u64 (arr2);
+  uint64x1_t actual = vext_u64 (in1, in2, 0);
+  if (actual != in1)
+    abort ();
+
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns8_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns8_1.c
@@ -0,0 +1,11 @@
+/* Test the `vtrn_s8' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vtrns8.x"
+
+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs16_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs16_1.c
@@ -0,0 +1,11 @@
+/* Test the `vtrnq_s16' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vtrnqs16.x"
+
+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs32_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs32_1.c
@@ -0,0 +1,10 @@
+/* Test the `vrev64q_s32' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vrev64qs32.x"
+
+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.4s, ?v\[0-9\]+.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s8_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s8_1.c
@@ -0,0 +1,10 @@
+/* Test the `vrev64_s8' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vrev64s8.x"
+
+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs16.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs16.x
@@ -0,0 +1,27 @@
+extern void abort (void);
+
+int16x8x2_t
+test_vzipqs16 (int16x8_t _a, int16x8_t _b)
+{
+  return vzipq_s16 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  int16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
+  int16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
+  int16x8x2_t result = test_vzipqs16 (vld1q_s16 (first), vld1q_s16 (second));
+  int16x8_t res1 = result.val[0], res2 = result.val[1];
+  int16_t exp1[] = {1, 9, 2, 10, 3, 11, 4, 12};
+  int16_t exp2[] = {5, 13, 6, 14, 7, 15, 8, 16};
+  int16x8_t expected1 = vld1q_s16 (exp1);
+  int16x8_t expected2 = vld1q_s16 (exp2);
+
+  for (i = 0; i < 8; i++)
+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipf32.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipf32.x
@@ -0,0 +1,27 @@
+extern void abort (void);
+
+float32x2x2_t
+test_vzipf32 (float32x2_t _a, float32x2_t _b)
+{
+  return vzip_f32 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  float32_t first[] = {1, 2};
+  float32_t second[] = {3, 4};
+  float32x2x2_t result = test_vzipf32 (vld1_f32 (first), vld1_f32 (second));
+  float32x2_t res1 = result.val[0], res2 = result.val[1];
+  float32_t exp1[] = {1, 3};
+  float32_t exp2[] = {2, 4};
+  float32x2_t expected1 = vld1_f32 (exp1);
+  float32x2_t expected2 = vld1_f32 (exp2);
+
+  for (i = 0; i < 2; i++)
+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu8.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu8.x
@@ -0,0 +1,27 @@
+extern void abort (void);
+
+uint8x8x2_t
+test_vzipu8 (uint8x8_t _a, uint8x8_t _b)
+{
+  return vzip_u8 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  uint8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
+  uint8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
+  uint8x8x2_t result = test_vzipu8 (vld1_u8 (first), vld1_u8 (second));
+  uint8x8_t res1 = result.val[0], res2 = result.val[1];
+  uint8_t exp1[] = {1, 9, 2, 10, 3, 11, 4, 12};
+  uint8_t exp2[] = {5, 13, 6, 14, 7, 15, 8, 16};
+  uint8x8_t expected1 = vld1_u8 (exp1);
+  uint8x8_t expected2 = vld1_u8 (exp2);
+
+  for (i = 0; i < 8; i++)
+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu16.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu16.x
@@ -0,0 +1,27 @@
+extern void abort (void);
+
+uint16x8x2_t
+test_vzipqu16 (uint16x8_t _a, uint16x8_t _b)
+{
+  return vzipq_u16 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  uint16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
+  uint16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
+  uint16x8x2_t result = test_vzipqu16 (vld1q_u16 (first), vld1q_u16 (second));
+  uint16x8_t res1 = result.val[0], res2 = result.val[1];
+  uint16_t exp1[] = {1, 9, 2, 10, 3, 11, 4, 12};
+  uint16_t exp2[] = {5, 13, 6, 14, 7, 15, 8, 16};
+  uint16x8_t expected1 = vld1q_u16 (exp1);
+  uint16x8_t expected2 = vld1q_u16 (exp2);
+
+  for (i = 0; i < 8; i++)
+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s16_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s16_1.c
@@ -0,0 +1,10 @@
+/* Test the `vextQs16' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+#include "extq_s16.x"
+
+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp16_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp16_1.c
@@ -0,0 +1,11 @@
+/* Test the `vuzpq_p16' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vuzpqp16.x"
+
+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p8.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p8.x
@@ -0,0 +1,114 @@
+extern void abort (void);
+
+poly8x8_t
+test_vext_p8_1 (poly8x8_t a, poly8x8_t b)
+{
+  return vext_p8 (a, b, 1);
+}
+
+poly8x8_t
+test_vext_p8_2 (poly8x8_t a, poly8x8_t b)
+{
+  return vext_p8 (a, b, 2);
+}
+
+poly8x8_t
+test_vext_p8_3 (poly8x8_t a, poly8x8_t b)
+{
+  return vext_p8 (a, b, 3);
+}
+
+poly8x8_t
+test_vext_p8_4 (poly8x8_t a, poly8x8_t b)
+{
+  return vext_p8 (a, b, 4);
+}
+
+poly8x8_t
+test_vext_p8_5 (poly8x8_t a, poly8x8_t b)
+{
+  return vext_p8 (a, b, 5);
+}
+
+poly8x8_t
+test_vext_p8_6 (poly8x8_t a, poly8x8_t b)
+{
+  return vext_p8 (a, b, 6);
+}
+
+poly8x8_t
+test_vext_p8_7 (poly8x8_t a, poly8x8_t b)
+{
+  return vext_p8 (a, b, 7);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i, off;
+  poly8_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7};
+  poly8x8_t in1 = vld1_p8 (arr1);
+  poly8_t arr2[] = {8, 9, 10, 11, 12, 13, 14, 15};
+  poly8x8_t in2 = vld1_p8 (arr2);
+  poly8_t exp[8];
+  poly8x8_t expected;
+  poly8x8_t actual = test_vext_p8_1 (in1, in2);
+
+  for (i = 0; i < 8; i++)
+    exp[i] = i + 1;
+  expected = vld1_p8 (exp);
+  for (i = 0; i < 8; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vext_p8_2 (in1, in2);
+  for (i = 0; i < 8; i++)
+    exp[i] = i + 2;
+  expected = vld1_p8 (exp);
+  for (i = 0; i < 8; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vext_p8_3 (in1, in2);
+  for (i = 0; i < 8; i++)
+    exp[i] = i + 3;
+  expected = vld1_p8 (exp);
+  for (i = 0; i < 8; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vext_p8_4 (in1, in2);
+  for (i = 0; i < 8; i++)
+    exp[i] = i + 4;
+  expected = vld1_p8 (exp);
+  for (i = 0; i < 8; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vext_p8_5 (in1, in2);
+  for (i = 0; i < 8; i++)
+    exp[i] = i + 5;
+  expected = vld1_p8 (exp);
+  for (i = 0; i < 8; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vext_p8_6 (in1, in2);
+  for (i = 0; i < 8; i++)
+    exp[i] = i + 6;
+  expected = vld1_p8 (exp);
+  for (i = 0; i < 8; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vext_p8_7 (in1, in2);
+  for (i = 0; i < 8; i++)
+    exp[i] = i + 7;
+  expected = vld1_p8 (exp);
+  for (i = 0; i < 8; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu32_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu32_1.c
@@ -0,0 +1,11 @@
+/* Test the `vuzpq_u32' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vuzpqu32.x"
+
+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s16_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s16_1.c
@@ -0,0 +1,10 @@
+/* Test the `vrev32_s16' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vrev32s16.x"
+
+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.4h, ?v\[0-9\]+.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp8.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp8.x
@@ -0,0 +1,26 @@
+extern void abort (void);
+
+poly8x8x2_t
+test_vuzpp8 (poly8x8_t _a, poly8x8_t _b)
+{
+  return vuzp_p8 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  poly8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
+  poly8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
+  poly8x8x2_t result = test_vuzpp8 (vld1_p8 (first), vld1_p8 (second));
+  poly8_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15};
+  poly8_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16};
+  poly8x8_t expect1 = vld1_p8 (exp1);
+  poly8x8_t expect2 = vld1_p8 (exp2);
+
+  for (i = 0; i < 8; i++)
+    if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp8_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp8_1.c
@@ -0,0 +1,11 @@
+/* Test the `vzipq_p8' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vzipqp8.x"
+
+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs8_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs8_1.c
@@ -0,0 +1,10 @@
+/* Test the `vrev32q_s8' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vrev32qs8.x"
+
+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s32_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s32_1.c
@@ -0,0 +1,10 @@
+/* Test the `vrev64_s32' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vrev64s32.x"
+
+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.2s, ?v\[0-9\]+.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/simd.exp
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/simd.exp
@@ -0,0 +1,45 @@
+#  Specific regression driver for AArch64 SIMD instructions.
+#  Copyright (C) 2014 Free Software Foundation, Inc.
+#  Contributed by ARM Ltd.
+#
+#  This file is part of GCC.
+#
+#  GCC is free software; you can redistribute it and/or modify it
+#  under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 3, or (at your option)
+#  any later version.
+#
+#  GCC is distributed in the hope that it will be useful, but
+#  WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+#  General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with GCC; see the file COPYING3.  If not see
+#  <http://www.gnu.org/licenses/>.  */
+
+# GCC testsuite that uses the `dg.exp' driver.
+
+# Exit immediately if this isn't an AArch64 target.
+if {![istarget aarch64*-*-*] } then {
+  return
+}
+
+# Load support procs.
+load_lib gcc-dg.exp
+
+# If a testcase doesn't have special options, use these.
+global DEFAULT_CFLAGS
+if ![info exists DEFAULT_CFLAGS] then {
+    set DEFAULT_CFLAGS " -ansi -pedantic-errors"
+}
+
+# Initialize `dg'.
+dg-init
+
+# Main loop.
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \
+	"" $DEFAULT_CFLAGS
+
+# All done.
+dg-finish
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns16.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns16.x
@@ -0,0 +1,27 @@
+extern void abort (void);
+
+int16x4x2_t
+test_vtrns16 (int16x4_t _a, int16x4_t _b)
+{
+  return vtrn_s16 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  int16_t first[] = {1, 2, 3, 4};
+  int16_t second[] = {5, 6, 7, 8};
+  int16x4x2_t result = test_vtrns16 (vld1_s16 (first), vld1_s16 (second));
+  int16x4_t res1 = result.val[0], res2 = result.val[1];
+  int16_t exp1[] = {1, 5, 3, 7};
+  int16_t exp2[] = {2, 6, 4, 8};
+  int16x4_t expected1 = vld1_s16 (exp1);
+  int16x4_t expected2 = vld1_s16 (exp2);
+
+  for (i = 0; i < 4; i++)
+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu8_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu8_1.c
@@ -0,0 +1,10 @@
+/* Test the `vrev64q_u8' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vrev64qu8.x"
+
+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp8.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp8.x
@@ -0,0 +1,22 @@
+extern void abort (void);
+
+poly8x16_t
+test_vrev64qp8 (poly8x16_t _arg)
+{
+  return vrev64q_p8 (_arg);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  poly8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+  poly8x16_t reversed = test_vrev64qp8 (inorder);
+  poly8x16_t expected = {8, 7, 6, 5, 4, 3, 2, 1, 16, 15, 14, 13, 12, 11, 10, 9};
+
+  for (i = 0; i < 16; i++)
+    if (reversed[i] != expected[i])
+      abort ();
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu16.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu16.x
@@ -0,0 +1,27 @@
+extern void abort (void);
+
+uint16x4x2_t
+test_vtrnu16 (uint16x4_t _a, uint16x4_t _b)
+{
+  return vtrn_u16 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  uint16_t first[] = {1, 2, 3, 4};
+  uint16_t second[] = {5, 6, 7, 8};
+  uint16x4x2_t result = test_vtrnu16 (vld1_u16 (first), vld1_u16 (second));
+  uint16x4_t res1 = result.val[0], res2 = result.val[1];
+  uint16_t exp1[] = {1, 5, 3, 7};
+  uint16_t exp2[] = {2, 6, 4, 8};
+  uint16x4_t expected1 = vld1_u16 (exp1);
+  uint16x4_t expected2 = vld1_u16 (exp2);
+
+  for (i = 0; i < 4; i++)
+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p16.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p16.x
@@ -0,0 +1,58 @@
+extern void abort (void);
+
+poly16x4_t
+test_vext_p16_1 (poly16x4_t a, poly16x4_t b)
+{
+  return vext_p16 (a, b, 1);
+}
+
+poly16x4_t
+test_vext_p16_2 (poly16x4_t a, poly16x4_t b)
+{
+  return vext_p16 (a, b, 2);
+}
+
+poly16x4_t
+test_vext_p16_3 (poly16x4_t a, poly16x4_t b)
+{
+  return vext_p16 (a, b, 3);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i, off;
+  poly16_t arr1[] = {0, 1, 2, 3};
+  poly16x4_t in1 = vld1_p16 (arr1);
+  poly16_t arr2[] = {4, 5, 6, 7};
+  poly16x4_t in2 = vld1_p16 (arr2);
+  poly16_t exp[4];
+  poly16x4_t expected;
+  poly16x4_t actual = test_vext_p16_1 (in1, in2);
+
+  for (i = 0; i < 4; i++)
+    exp[i] = i + 1;
+  expected = vld1_p16 (exp);
+  for (i = 0; i < 4; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vext_p16_2 (in1, in2);
+  for (i = 0; i < 4; i++)
+    exp[i] = i + 2;
+  expected = vld1_p16 (exp);
+  for (i = 0; i < 4; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vext_p16_3 (in1, in2);
+  for (i = 0; i < 4; i++)
+    exp[i] = i + 3;
+  expected = vld1_p16 (exp);
+  for (i = 0; i < 4; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp16_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp16_1.c
@@ -0,0 +1,11 @@
+/* Test the `vuzp_p16' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vuzpp16.x"
+
+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu8.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu8.x
@@ -0,0 +1,29 @@
+extern void abort (void);
+
+uint8x16x2_t
+test_vzipqu8 (uint8x16_t _a, uint8x16_t _b)
+{
+  return vzipq_u8 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  uint8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+  uint8_t second[] =
+      {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32};
+  uint8x16x2_t result = test_vzipqu8 (vld1q_u8 (first), vld1q_u8 (second));
+  uint8x16_t res1 = result.val[0], res2 = result.val[1];
+  uint8_t exp1[] = {1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, 8, 24};
+  uint8_t exp2[] =
+      {9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31, 16, 32};
+  uint8x16_t expected1 = vld1q_u8 (exp1);
+  uint8x16_t expected2 = vld1q_u8 (exp2);
+
+  for (i = 0; i < 16; i++)
+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u64_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u64_1.c
@@ -0,0 +1,11 @@
+/* Test the `vextu64' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+#include "ext_u64.x"
+
+/* Do not scan-assembler.  An EXT instruction could be emitted, but would merely
+   return its first argument, so it is legitimate to optimize it out.  */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vqdmulls_laneq_s32.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vqdmulls_laneq_s32.c
@@ -0,0 +1,15 @@
+/* Test the vqdmulls_laneq_s32 AArch64 SIMD intrinsic.  */
+
+/* { dg-do compile } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+int64_t
+t_vqdmulls_laneq_s32 (int32_t a, int32x4_t b)
+{
+  return vqdmulls_laneq_s32 (a, b, 0);
+}
+
+/* { dg-final { scan-assembler-times "sqdmull\[ \t\]+\[dD\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[sS\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu32_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu32_1.c
@@ -0,0 +1,11 @@
+/* Test the `vuzp_u32' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vuzpu32.x"
+
+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp16_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp16_1.c
@@ -0,0 +1,10 @@
+/* Test the `vrev32q_p16' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vrev32qp16.x"
+
+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.8h, ?v\[0-9\]+.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vqshlb_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vqshlb_1.c
@@ -0,0 +1,21 @@
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+
+#include "arm_neon.h"
+
+extern void abort ();
+
+int
+main (int argc, char **argv)
+{
+  int8_t arg1 = -1;
+  int8_t arg2 = 127;
+  int8_t exp = -128;
+  int8_t got = vqshlb_s8 (arg1, arg2);
+
+  if (exp != got)
+    abort ();
+
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_f32.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_f32.x
@@ -0,0 +1,58 @@
+extern void abort (void);
+
+float32x4_t
+test_vextq_f32_1 (float32x4_t a, float32x4_t b)
+{
+  return vextq_f32 (a, b, 1);
+}
+
+float32x4_t
+test_vextq_f32_2 (float32x4_t a, float32x4_t b)
+{
+  return vextq_f32 (a, b, 2);
+}
+
+float32x4_t
+test_vextq_f32_3 (float32x4_t a, float32x4_t b)
+{
+  return vextq_f32 (a, b, 3);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i, off;
+  float32_t arr1[] = {0, 1, 2, 3};
+  float32x4_t in1 = vld1q_f32 (arr1);
+  float32_t arr2[] = {4, 5, 6, 7};
+  float32x4_t in2 = vld1q_f32 (arr2);
+  float32_t exp[4];
+  float32x4_t expected;
+  float32x4_t actual = test_vextq_f32_1 (in1, in2);
+
+  for (i = 0; i < 4; i++)
+    exp[i] = i + 1;
+  expected = vld1q_f32 (exp);
+  for (i = 0; i < 4; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_f32_2 (in1, in2);
+  for (i = 0; i < 4; i++)
+    exp[i] = i + 2;
+  expected = vld1q_f32 (exp);
+  for (i = 0; i < 4; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_f32_3 (in1, in2);
+  for (i = 0; i < 4; i++)
+    exp[i] = i + 3;
+  expected = vld1q_f32 (exp);
+  for (i = 0; i < 4; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp16_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp16_1.c
@@ -0,0 +1,11 @@
+/* Test the `vzipq_p16' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vzipqp16.x"
+
+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp8_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp8_1.c
@@ -0,0 +1,11 @@
+/* Test the `vtrn_p8' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vtrnp8.x"
+
+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u8.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u8.x
@@ -0,0 +1,227 @@
+extern void abort (void);
+
+uint8x16_t
+test_vextq_u8_1 (uint8x16_t a, uint8x16_t b)
+{
+  return vextq_u8 (a, b, 1);
+}
+
+uint8x16_t
+test_vextq_u8_2 (uint8x16_t a, uint8x16_t b)
+{
+  return vextq_u8 (a, b, 2);
+}
+
+uint8x16_t
+test_vextq_u8_3 (uint8x16_t a, uint8x16_t b)
+{
+  return vextq_u8 (a, b, 3);
+}
+
+uint8x16_t
+test_vextq_u8_4 (uint8x16_t a, uint8x16_t b)
+{
+  return vextq_u8 (a, b, 4);
+}
+
+uint8x16_t
+test_vextq_u8_5 (uint8x16_t a, uint8x16_t b)
+{
+  return vextq_u8 (a, b, 5);
+}
+
+uint8x16_t
+test_vextq_u8_6 (uint8x16_t a, uint8x16_t b)
+{
+  return vextq_u8 (a, b, 6);
+}
+
+uint8x16_t
+test_vextq_u8_7 (uint8x16_t a, uint8x16_t b)
+{
+  return vextq_u8 (a, b, 7);
+}
+
+uint8x16_t
+test_vextq_u8_8 (uint8x16_t a, uint8x16_t b)
+{
+  return vextq_u8 (a, b, 8);
+}
+
+uint8x16_t
+test_vextq_u8_9 (uint8x16_t a, uint8x16_t b)
+{
+  return vextq_u8 (a, b, 9);
+}
+
+uint8x16_t
+test_vextq_u8_10 (uint8x16_t a, uint8x16_t b)
+{
+  return vextq_u8 (a, b, 10);
+}
+
+uint8x16_t
+test_vextq_u8_11 (uint8x16_t a, uint8x16_t b)
+{
+  return vextq_u8 (a, b, 11);
+}
+
+uint8x16_t
+test_vextq_u8_12 (uint8x16_t a, uint8x16_t b)
+{
+  return vextq_u8 (a, b, 12);
+}
+
+uint8x16_t
+test_vextq_u8_13 (uint8x16_t a, uint8x16_t b)
+{
+  return vextq_u8 (a, b, 13);
+}
+
+uint8x16_t
+test_vextq_u8_14 (uint8x16_t a, uint8x16_t b)
+{
+  return vextq_u8 (a, b, 14);
+}
+
+uint8x16_t
+test_vextq_u8_15 (uint8x16_t a, uint8x16_t b)
+{
+  return vextq_u8 (a, b, 15);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  uint8_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+  uint8x16_t in1 = vld1q_u8 (arr1);
+  uint8_t arr2[] =
+      {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31};
+  uint8x16_t in2 = vld1q_u8 (arr2);
+  uint8_t exp[16];
+  uint8x16_t expected;
+  uint8x16_t actual = test_vextq_u8_1 (in1, in2);
+
+  for (i = 0; i < 16; i++)
+    exp[i] = i + 1;
+  expected = vld1q_u8 (exp);
+  for (i = 0; i < 16; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_u8_2 (in1, in2);
+  for (i = 0; i < 16; i++)
+    exp[i] = i + 2;
+  expected = vld1q_u8 (exp);
+  for (i = 0; i < 16; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_u8_3 (in1, in2);
+  for (i = 0; i < 16; i++)
+    exp[i] = i + 3;
+  expected = vld1q_u8 (exp);
+  for (i = 0; i < 16; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_u8_4 (in1, in2);
+  for (i = 0; i < 16; i++)
+    exp[i] = i + 4;
+  expected = vld1q_u8 (exp);
+  for (i = 0; i < 16; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_u8_5 (in1, in2);
+  for (i = 0; i < 16; i++)
+    exp[i] = i + 5;
+  expected = vld1q_u8 (exp);
+  for (i = 0; i < 16; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_u8_6 (in1, in2);
+  for (i = 0; i < 16; i++)
+    exp[i] = i + 6;
+  expected = vld1q_u8 (exp);
+  for (i = 0; i < 16; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_u8_7 (in1, in2);
+  for (i = 0; i < 16; i++)
+    exp[i] = i + 7;
+  expected = vld1q_u8 (exp);
+  for (i = 0; i < 16; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_u8_8 (in1, in2);
+  for (i = 0; i < 16; i++)
+    exp[i] = i + 8;
+  expected = vld1q_u8 (exp);
+  for (i = 0; i < 16; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_u8_9 (in1, in2);
+  for (i = 0; i < 16; i++)
+    exp[i] = i + 9;
+  expected = vld1q_u8 (exp);
+  for (i = 0; i < 16; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_u8_10 (in1, in2);
+  for (i = 0; i < 16; i++)
+    exp[i] = i + 10;
+  expected = vld1q_u8 (exp);
+  for (i = 0; i < 16; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_u8_11 (in1, in2);
+  for (i = 0; i < 16; i++)
+    exp[i] = i + 11;
+  expected = vld1q_u8 (exp);
+  for (i = 0; i < 16; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_u8_12 (in1, in2);
+  for (i = 0; i < 16; i++)
+    exp[i] = i + 12;
+  expected = vld1q_u8 (exp);
+  for (i = 0; i < 16; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_u8_13 (in1, in2);
+  for (i = 0; i < 16; i++)
+    exp[i] = i + 13;
+  expected = vld1q_u8 (exp);
+  for (i = 0; i < 16; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_u8_14 (in1, in2);
+  for (i = 0; i < 16; i++)
+    exp[i] = i + 14;
+  expected = vld1q_u8 (exp);
+  for (i = 0; i < 16; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_u8_15 (in1, in2);
+  for (i = 0; i < 16; i++)
+    exp[i] = i + 15;
+  expected = vld1q_u8 (exp);
+  for (i = 0; i < 16; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu32_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu32_1.c
@@ -0,0 +1,11 @@
+/* Test the `vzipq_u32' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vzipqu32.x"
+
+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p8_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p8_1.c
@@ -0,0 +1,10 @@
+/* Test the `vrev64_p8' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vrev64p8.x"
+
+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u8_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u8_1.c
@@ -0,0 +1,10 @@
+/* Test the `vrev32_u8' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vrev32u8.x"
+
+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16s8_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16s8_1.c
@@ -0,0 +1,10 @@
+/* Test the `vrev16_s8' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vrev16s8.x"
+
+/* { dg-final { scan-assembler-times "rev16\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqf32.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqf32.x
@@ -0,0 +1,26 @@
+extern void abort (void);
+
+float32x4x2_t
+test_vuzpqf32 (float32x4_t _a, float32x4_t _b)
+{
+  return vuzpq_f32 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  float32_t first[] = {1, 2, 3, 4};
+  float32_t second[] = {5, 6, 7, 8};
+  float32x4x2_t result = test_vuzpqf32 (vld1q_f32 (first), vld1q_f32 (second));
+  float32_t exp1[] = {1, 3, 5, 7};
+  float32_t exp2[] = {2, 4, 6, 8};
+  float32x4_t expect1 = vld1q_f32 (exp1);
+  float32x4_t expect2 = vld1q_f32 (exp2);
+
+  for (i = 0; i < 4; i++)
+    if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp8.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp8.x
@@ -0,0 +1,27 @@
+extern void abort (void);
+
+poly8x8x2_t
+test_vzipp8 (poly8x8_t _a, poly8x8_t _b)
+{
+  return vzip_p8 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  poly8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
+  poly8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
+  poly8x8x2_t result = test_vzipp8 (vld1_p8 (first), vld1_p8 (second));
+  poly8x8_t res1 = result.val[0], res2 = result.val[1];
+  poly8_t exp1[] = {1, 9, 2, 10, 3, 11, 4, 12};
+  poly8_t exp2[] = {5, 13, 6, 14, 7, 15, 8, 16};
+  poly8x8_t expected1 = vld1_p8 (exp1);
+  poly8x8_t expected2 = vld1_p8 (exp2);
+
+  for (i = 0; i < 8; i++)
+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs32.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs32.x
@@ -0,0 +1,27 @@
+extern void abort (void);
+
+int32x4x2_t
+test_vtrnqs32 (int32x4_t _a, int32x4_t _b)
+{
+  return vtrnq_s32 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  int32_t first[] = {1, 2, 3, 4};
+  int32_t second[] = {5, 6, 7, 8};
+  int32x4x2_t result = test_vtrnqs32 (vld1q_s32 (first), vld1q_s32 (second));
+  int32x4_t res1 = result.val[0], res2 = result.val[1];
+  int32_t exp1[] = {1, 5, 3, 7};
+  int32_t exp2[] = {2, 6, 4, 8};
+  int32x4_t expected1 = vld1q_s32 (exp1);
+  int32x4_t expected2 = vld1q_s32 (exp2);
+
+  for (i = 0; i < 4; i++)
+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/int_comparisons_2.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/int_comparisons_2.c
@@ -0,0 +1,131 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-inline" } */
+/* Stops the test_xxx methods being inlined into main, thus preventing constant
+   propagation.  */
+
+#include "int_comparisons.x"
+
+extern void abort (void);
+
+#define CHECK2(R0, R1) if (res[0] != R0 || res[1] != R1) abort ()
+
+#define TEST2(BASETYPE, SUFFIX, RESTYPE, ST1_SUFFIX) {			\
+  BASETYPE##_t _a[2] = {2, 3};						\
+  BASETYPE##x2_t a = vld1##SUFFIX (_a);					\
+  BASETYPE##_t _b[2] = {1, 3};						\
+  BASETYPE##x2_t b = vld1##SUFFIX (_b);					\
+  RESTYPE res[2];							\
+  vst1##ST1_SUFFIX (res, test_vclt##SUFFIX (a, b)); CHECK2 (0, 0);	\
+  vst1##ST1_SUFFIX (res, test_vclt##SUFFIX (b, a)); CHECK2 (-1, 0);	\
+  vst1##ST1_SUFFIX (res, test_vcle##SUFFIX (a, b)); CHECK2 (0, -1);	\
+  vst1##ST1_SUFFIX (res, test_vcle##SUFFIX (b, a)); CHECK2 (-1, -1);	\
+  vst1##ST1_SUFFIX (res, test_vceq##SUFFIX (a, b)); CHECK2 (0, -1);	\
+  vst1##ST1_SUFFIX (res, test_vcge##SUFFIX (a, b)); CHECK2 (-1, -1);	\
+  vst1##ST1_SUFFIX (res, test_vcge##SUFFIX (b, a)); CHECK2 (0, -1);	\
+  vst1##ST1_SUFFIX (res, test_vcgt##SUFFIX (a, b)); CHECK2 (-1, 0);	\
+  vst1##ST1_SUFFIX (res, test_vcgt##SUFFIX (b, a)); CHECK2 (0, 0);	\
+  vst1##ST1_SUFFIX (res, test_vtst##SUFFIX (a, b)); CHECK2 (0, -1);	\
+  vst1##ST1_SUFFIX (res, test_vtst##SUFFIX (a + 1, b)); CHECK2 (-1, 0); \
+}
+
+#define CHECK4(T, R0, R1, R2, R3)		\
+  if (res[0] != (T)R0 || res[1] != (T)R1	\
+      || res[2] != (T)R2 || res[3] != (T)R3) abort ()
+
+#define TEST4(BASETYPE, SUFFIX, RESTYPE, ST1_SUFFIX) {	\
+  BASETYPE##_t _a[4] = {1, 2, 3, 4};			\
+  BASETYPE##x4_t a = vld1##SUFFIX (_a);			\
+  BASETYPE##_t _b[4] = {4, 2, 1, 3};			\
+  BASETYPE##x4_t b = vld1##SUFFIX (_b);			\
+  RESTYPE res[4];					\
+  vst1##ST1_SUFFIX (res, test_vclt##SUFFIX (a, b));	\
+  CHECK4 (RESTYPE, -1, 0, 0, 0);			\
+  vst1##ST1_SUFFIX (res, test_vcle##SUFFIX (a, b));	\
+  CHECK4 (RESTYPE, -1, -1, 0, 0);			\
+  vst1##ST1_SUFFIX (res, test_vceq##SUFFIX (a, b));	\
+  CHECK4 (RESTYPE, 0, -1, 0, 0);			\
+  vst1##ST1_SUFFIX (res, test_vcge##SUFFIX (a, b));	\
+  CHECK4 (RESTYPE, 0, -1, -1, -1);			\
+  vst1##ST1_SUFFIX (res, test_vcgt##SUFFIX (a, b));	\
+  CHECK4 (RESTYPE, 0, 0, -1, -1);			\
+  vst1##ST1_SUFFIX (res, test_vtst##SUFFIX (a, b));	\
+  CHECK4 (RESTYPE, 0, -1, -1, 0);			\
+}
+
+#define CHECK8(T, R0, R1, R2, R3, R4, R5, R6, R7)			       \
+  if (res[0] != (T)R0 || res[1] != (T)R1 || res[2] != (T)R2 || res[3] != (T)R3 \
+      || res[4] != (T)R4 || res[5] != (T)R5 || res[6] != (T)R6		       \
+      || res[7] != (T)R7) abort ()
+
+#define TEST8(BASETYPE, SUFFIX, RESTYPE, ST1_SUFFIX) {	\
+  BASETYPE##_t _a[8] = {1, 2, 3, 4, 5, 6, 7, 8};	\
+  BASETYPE##x8_t a = vld1##SUFFIX (_a);			\
+  BASETYPE##_t _b[8] = {4, 2, 1, 3, 2, 6, 8, 9};	\
+  BASETYPE##x8_t b = vld1##SUFFIX (_b);			\
+  RESTYPE res[8];					\
+  vst1##ST1_SUFFIX (res, test_vclt##SUFFIX (a, b));	\
+  CHECK8 (RESTYPE, -1, 0, 0, 0, 0, 0, -1, -1);		\
+  vst1##ST1_SUFFIX (res, test_vcle##SUFFIX (a, b));	\
+  CHECK8 (RESTYPE, -1, -1, 0, 0, 0, -1, -1, -1);	\
+  vst1##ST1_SUFFIX (res, test_vceq##SUFFIX (a, b));	\
+  CHECK8 (RESTYPE, 0, -1, 0, 0, 0, -1, 0, 0);		\
+  vst1##ST1_SUFFIX (res, test_vcge##SUFFIX (a, b));	\
+  CHECK8 (RESTYPE, 0, -1, -1, -1, -1, -1, 0, 0);	\
+  vst1##ST1_SUFFIX (res, test_vcgt##SUFFIX (a, b));	\
+  CHECK8 (RESTYPE, 0, 0, -1, -1, -1, 0, 0, 0);		\
+  vst1##ST1_SUFFIX (res, test_vtst##SUFFIX (a, b));	\
+  CHECK8 (RESTYPE, 0, -1, -1, 0, 0, -1, 0, -1);		\
+}
+
+/* 16-way tests use same 8 values twice.  */
+#define CHECK16(T, R0, R1, R2, R3, R4, R5, R6, R7)			       \
+  if (res[0] != (T)R0 || res[1] != (T)R1 || res[2] != (T)R2 || res[3] != (T)R3 \
+      || res[4] != (T)R4 || res[5] != (T)R5 || res[6] != (T)R6		       \
+      || res[7] != (T)R7 || res[8] != (T)R0 || res[9] != (T)R1		       \
+      || res[10] != (T)R2 || res[11] != (T)R3 || res[12] != (T)R4	       \
+      || res[13] != (T)R5 || res[14] != (T)R6 || res[15] != (T)R7) abort ()
+
+#define TEST16(BASETYPE, SUFFIX, RESTYPE, ST1_SUFFIX) {			  \
+  BASETYPE##_t _a[16] = {1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8}; \
+  BASETYPE##x16_t a = vld1##SUFFIX (_a);				  \
+  BASETYPE##_t _b[16] = {4, 2, 1, 3, 2, 6, 8, 9, 4, 2, 1, 3, 2, 6, 8, 9}; \
+  BASETYPE##x16_t b = vld1##SUFFIX (_b);				  \
+  RESTYPE res[16];							  \
+  vst1##ST1_SUFFIX (res, test_vclt##SUFFIX (a, b));			  \
+  CHECK16 (RESTYPE, -1, 0, 0, 0, 0, 0, -1, -1);				  \
+  vst1##ST1_SUFFIX (res, test_vcle##SUFFIX (a, b));			  \
+  CHECK16 (RESTYPE, -1, -1, 0, 0, 0, -1, -1, -1);			  \
+  vst1##ST1_SUFFIX (res, test_vceq##SUFFIX (a, b));			  \
+  CHECK16 (RESTYPE, 0, -1, 0, 0, 0, -1, 0, 0);				  \
+  vst1##ST1_SUFFIX (res, test_vcge##SUFFIX (a, b));			  \
+  CHECK16 (RESTYPE, 0, -1, -1, -1, -1, -1, 0, 0);			  \
+  vst1##ST1_SUFFIX (res, test_vcgt##SUFFIX (a, b));			  \
+  CHECK16 (RESTYPE, 0, 0, -1, -1, -1, 0, 0, 0);				  \
+  vst1##ST1_SUFFIX (res, test_vtst##SUFFIX (a, b));			  \
+  CHECK16 (RESTYPE, 0, -1, -1, 0, 0, -1, 0, -1);			  \
+}
+
+int
+main (int argc, char **argv)
+{
+  TEST2 (int32, _s32, uint32_t, _u32);
+  TEST2 (uint32, _u32, uint32_t, _u32);
+  TEST2 (int64, q_s64, uint64_t, q_u64);
+  TEST2 (uint64, q_u64, uint64_t, q_u64);
+
+  TEST4 (int16, _s16, uint16_t, _u16);
+  TEST4 (uint16, _u16, uint16_t, _u16);
+  TEST4 (int32, q_s32, uint32_t, q_u32);
+  TEST4 (uint32, q_u32, uint32_t, q_u32);
+
+  TEST8 (int8, _s8, uint8_t, _u8);
+  TEST8 (uint8, _u8, uint8_t, _u8);
+  TEST8 (int16, q_s16, uint16_t, q_u16);
+  TEST8 (uint16, q_u16, uint16_t, q_u16);
+
+  TEST16 (int8, q_s8, uint8_t, q_u8);
+  TEST16 (uint8, q_u8, uint8_t, q_u8);
+
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu32.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu32.x
@@ -0,0 +1,27 @@
+extern void abort (void);
+
+uint32x4x2_t
+test_vtrnqu32 (uint32x4_t _a, uint32x4_t _b)
+{
+  return vtrnq_u32 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  uint32_t first[] = {1, 2, 3, 4};
+  uint32_t second[] = {5, 6, 7, 8};
+  uint32x4x2_t result = test_vtrnqu32 (vld1q_u32 (first), vld1q_u32 (second));
+  uint32x4_t res1 = result.val[0], res2 = result.val[1];
+  uint32_t exp1[] = {1, 5, 3, 7};
+  uint32_t exp2[] = {2, 6, 4, 8};
+  uint32x4_t expected1 = vld1q_u32 (exp1);
+  uint32x4_t expected2 = vld1q_u32 (exp2);
+
+  for (i = 0; i < 4; i++)
+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs32.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs32.x
@@ -0,0 +1,22 @@
+extern void abort (void);
+
+int32x4_t
+test_vrev64qs32 (int32x4_t _arg)
+{
+  return vrev64q_s32 (_arg);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  int32x4_t inorder = {1, 2, 3, 4};
+  int32x4_t reversed = test_vrev64qs32 (inorder);
+  int32x4_t expected = {2, 1, 4, 3};
+
+  for (i = 0; i < 4; i++)
+    if (reversed[i] != expected[i])
+      abort ();
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu8.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu8.x
@@ -0,0 +1,27 @@
+extern void abort (void);
+
+uint8x8x2_t
+test_vtrnu8 (uint8x8_t _a, uint8x8_t _b)
+{
+  return vtrn_u8 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  uint8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
+  uint8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
+  uint8x8x2_t result = test_vtrnu8 (vld1_u8 (first), vld1_u8 (second));
+  uint8x8_t res1 = result.val[0], res2 = result.val[1];
+  uint8_t exp1[] = {1, 9, 3, 11, 5, 13, 7, 15};
+  uint8_t exp2[] = {2, 10, 4, 12, 6, 14, 8, 16};
+  uint8x8_t expected1 = vld1_u8 (exp1);
+  uint8x8_t expected2 = vld1_u8 (exp2);
+
+  for (i = 0; i < 8; i++)
+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu32.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu32.x
@@ -0,0 +1,22 @@
+extern void abort (void);
+
+uint32x4_t
+test_vrev64qu32 (uint32x4_t _arg)
+{
+  return vrev64q_u32 (_arg);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  uint32x4_t inorder = {1, 2, 3, 4};
+  uint32x4_t reversed = test_vrev64qu32 (inorder);
+  uint32x4_t expected = {2, 1, 4, 3};
+
+  for (i = 0; i < 4; i++)
+    if (reversed[i] != expected[i])
+      abort ();
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s64_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s64_1.c
@@ -0,0 +1,10 @@
+/* Test the `vextQs64' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+#include "extq_s64.x"
+
+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.8\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s8.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s8.x
@@ -0,0 +1,114 @@
+extern void abort (void);
+
+int8x8_t
+test_vext_s8_1 (int8x8_t a, int8x8_t b)
+{
+  return vext_s8 (a, b, 1);
+}
+
+int8x8_t
+test_vext_s8_2 (int8x8_t a, int8x8_t b)
+{
+  return vext_s8 (a, b, 2);
+}
+
+int8x8_t
+test_vext_s8_3 (int8x8_t a, int8x8_t b)
+{
+  return vext_s8 (a, b, 3);
+}
+
+int8x8_t
+test_vext_s8_4 (int8x8_t a, int8x8_t b)
+{
+  return vext_s8 (a, b, 4);
+}
+
+int8x8_t
+test_vext_s8_5 (int8x8_t a, int8x8_t b)
+{
+  return vext_s8 (a, b, 5);
+}
+
+int8x8_t
+test_vext_s8_6 (int8x8_t a, int8x8_t b)
+{
+  return vext_s8 (a, b, 6);
+}
+
+int8x8_t
+test_vext_s8_7 (int8x8_t a, int8x8_t b)
+{
+  return vext_s8 (a, b, 7);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i, off;
+  int8_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7};
+  int8x8_t in1 = vld1_s8 (arr1);
+  int8_t arr2[] = {8, 9, 10, 11, 12, 13, 14, 15};
+  int8x8_t in2 = vld1_s8 (arr2);
+  int8_t exp[8];
+  int8x8_t expected;
+  int8x8_t actual = test_vext_s8_1 (in1, in2);
+
+  for (i = 0; i < 8; i++)
+    exp[i] = i + 1;
+  expected = vld1_s8 (exp);
+  for (i = 0; i < 8; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vext_s8_2 (in1, in2);
+  for (i = 0; i < 8; i++)
+    exp[i] = i + 2;
+  expected = vld1_s8 (exp);
+  for (i = 0; i < 8; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vext_s8_3 (in1, in2);
+  for (i = 0; i < 8; i++)
+    exp[i] = i + 3;
+  expected = vld1_s8 (exp);
+  for (i = 0; i < 8; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vext_s8_4 (in1, in2);
+  for (i = 0; i < 8; i++)
+    exp[i] = i + 4;
+  expected = vld1_s8 (exp);
+  for (i = 0; i < 8; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vext_s8_5 (in1, in2);
+  for (i = 0; i < 8; i++)
+    exp[i] = i + 5;
+  expected = vld1_s8 (exp);
+  for (i = 0; i < 8; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vext_s8_6 (in1, in2);
+  for (i = 0; i < 8; i++)
+    exp[i] = i + 6;
+  expected = vld1_s8 (exp);
+  for (i = 0; i < 8; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vext_s8_7 (in1, in2);
+  for (i = 0; i < 8; i++)
+    exp[i] = i + 7;
+  expected = vld1_s8 (exp);
+  for (i = 0; i < 8; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzips32_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzips32_1.c
@@ -0,0 +1,11 @@
+/* Test the `vzip_s32' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vzips32.x"
+
+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp16_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp16_1.c
@@ -0,0 +1,11 @@
+/* Test the `vtrn_p16' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vtrnp16.x"
+
+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp8_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp8_1.c
@@ -0,0 +1,10 @@
+/* Test the `vrev32q_p8' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vrev32qp8.x"
+
+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu32_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu32_1.c
@@ -0,0 +1,11 @@
+/* Test the `vtrn_u32' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vtrnu32.x"
+
+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps8.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps8.x
@@ -0,0 +1,26 @@
+extern void abort (void);
+
+int8x8x2_t
+test_vuzps8 (int8x8_t _a, int8x8_t _b)
+{
+  return vuzp_s8 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  int8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
+  int8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
+  int8x8x2_t result = test_vuzps8 (vld1_s8 (first), vld1_s8 (second));
+  int8_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15};
+  int8_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16};
+  int8x8_t expect1 = vld1_s8 (exp1);
+  int8x8_t expect2 = vld1_s8 (exp2);
+
+  for (i = 0; i < 8; i++)
+    if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu8_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu8_1.c
@@ -0,0 +1,11 @@
+/* Test the `vzipq_u8' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vzipqu8.x"
+
+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp8.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp8.x
@@ -0,0 +1,29 @@
+extern void abort (void);
+
+poly8x16x2_t
+test_vzipqp8 (poly8x16_t _a, poly8x16_t _b)
+{
+  return vzipq_p8 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  poly8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+  poly8_t second[] =
+      {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32};
+  poly8x16x2_t result = test_vzipqp8 (vld1q_p8 (first), vld1q_p8 (second));
+  poly8x16_t res1 = result.val[0], res2 = result.val[1];
+  poly8_t exp1[] = {1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, 8, 24};
+  poly8_t exp2[] =
+      {9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31, 16, 32};
+  poly8x16_t expected1 = vld1q_p8 (exp1);
+  poly8x16_t expected2 = vld1q_p8 (exp2);
+
+  for (i = 0; i < 16; i++)
+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p16_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p16_1.c
@@ -0,0 +1,10 @@
+/* Test the `vextp16' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+#include "ext_p16.x"
+
+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s16.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s16.x
@@ -0,0 +1,22 @@
+extern void abort (void);
+
+int16x4_t
+test_vrev32s16 (int16x4_t _arg)
+{
+  return vrev32_s16 (_arg);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  int16x4_t inorder = {1, 2, 3, 4};
+  int16x4_t reversed = test_vrev32s16 (inorder);
+  int16x4_t expected = {2, 1, 4, 3};
+
+  for (i = 0; i < 4; i++)
+    if (reversed[i] != expected[i])
+      abort ();
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u16.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u16.x
@@ -0,0 +1,22 @@
+extern void abort (void);
+
+uint16x4_t
+test_vrev32u16 (uint16x4_t _arg)
+{
+  return vrev32_u16 (_arg);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  uint16x4_t inorder = {1, 2, 3, 4};
+  uint16x4_t reversed = test_vrev32u16 (inorder);
+  uint16x4_t expected = {2, 1, 4, 3};
+
+  for (i = 0; i < 4; i++)
+    if (reversed[i] != expected[i])
+      abort ();
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p16.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p16.x
@@ -0,0 +1,22 @@
+extern void abort (void);
+
+poly16x4_t
+test_vrev64p16 (poly16x4_t _arg)
+{
+  return vrev64_p16 (_arg);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  poly16x4_t inorder = {1, 2, 3, 4};
+  poly16x4_t reversed = test_vrev64p16 (inorder);
+  poly16x4_t expected = {4, 3, 2, 1};
+
+  for (i = 0; i < 4; i++)
+    if (reversed[i] != expected[i])
+      abort ();
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qf32_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qf32_1.c
@@ -0,0 +1,10 @@
+/* Test the `vrev64q_f32' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vrev64qf32.x"
+
+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.4s, ?v\[0-9\]+.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqf32.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqf32.x
@@ -0,0 +1,27 @@
+extern void abort (void);
+
+float32x4x2_t
+test_vzipqf32 (float32x4_t _a, float32x4_t _b)
+{
+  return vzipq_f32 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  float32_t first[] = {1, 2, 3, 4};
+  float32_t second[] = {5, 6, 7, 8};
+  float32x4x2_t result = test_vzipqf32 (vld1q_f32 (first), vld1q_f32 (second));
+  float32x4_t res1 = result.val[0], res2 = result.val[1];
+  float32_t exp1[] = {1, 5, 2, 6};
+  float32_t exp2[] = {3, 7, 4, 8};
+  float32x4_t expected1 = vld1q_f32 (exp1);
+  float32x4_t expected2 = vld1q_f32 (exp2);
+
+  for (i = 0; i < 4; i++)
+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u32_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u32_1.c
@@ -0,0 +1,10 @@
+/* Test the `vextu32' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+#include "ext_u32.x"
+
+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#\[0-9\]+\(?:.4)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p8.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p8.x
@@ -0,0 +1,227 @@
+extern void abort (void);
+
+poly8x16_t
+test_vextq_p8_1 (poly8x16_t a, poly8x16_t b)
+{
+  return vextq_p8 (a, b, 1);
+}
+
+poly8x16_t
+test_vextq_p8_2 (poly8x16_t a, poly8x16_t b)
+{
+  return vextq_p8 (a, b, 2);
+}
+
+poly8x16_t
+test_vextq_p8_3 (poly8x16_t a, poly8x16_t b)
+{
+  return vextq_p8 (a, b, 3);
+}
+
+poly8x16_t
+test_vextq_p8_4 (poly8x16_t a, poly8x16_t b)
+{
+  return vextq_p8 (a, b, 4);
+}
+
+poly8x16_t
+test_vextq_p8_5 (poly8x16_t a, poly8x16_t b)
+{
+  return vextq_p8 (a, b, 5);
+}
+
+poly8x16_t
+test_vextq_p8_6 (poly8x16_t a, poly8x16_t b)
+{
+  return vextq_p8 (a, b, 6);
+}
+
+poly8x16_t
+test_vextq_p8_7 (poly8x16_t a, poly8x16_t b)
+{
+  return vextq_p8 (a, b, 7);
+}
+
+poly8x16_t
+test_vextq_p8_8 (poly8x16_t a, poly8x16_t b)
+{
+  return vextq_p8 (a, b, 8);
+}
+
+poly8x16_t
+test_vextq_p8_9 (poly8x16_t a, poly8x16_t b)
+{
+  return vextq_p8 (a, b, 9);
+}
+
+poly8x16_t
+test_vextq_p8_10 (poly8x16_t a, poly8x16_t b)
+{
+  return vextq_p8 (a, b, 10);
+}
+
+poly8x16_t
+test_vextq_p8_11 (poly8x16_t a, poly8x16_t b)
+{
+  return vextq_p8 (a, b, 11);
+}
+
+poly8x16_t
+test_vextq_p8_12 (poly8x16_t a, poly8x16_t b)
+{
+  return vextq_p8 (a, b, 12);
+}
+
+poly8x16_t
+test_vextq_p8_13 (poly8x16_t a, poly8x16_t b)
+{
+  return vextq_p8 (a, b, 13);
+}
+
+poly8x16_t
+test_vextq_p8_14 (poly8x16_t a, poly8x16_t b)
+{
+  return vextq_p8 (a, b, 14);
+}
+
+poly8x16_t
+test_vextq_p8_15 (poly8x16_t a, poly8x16_t b)
+{
+  return vextq_p8 (a, b, 15);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  poly8_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+  poly8x16_t in1 = vld1q_p8 (arr1);
+  poly8_t arr2[] =
+      {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31};
+  poly8x16_t in2 = vld1q_p8 (arr2);
+  poly8_t exp[16];
+  poly8x16_t expected;
+  poly8x16_t actual = test_vextq_p8_1 (in1, in2);
+
+  for (i = 0; i < 16; i++)
+    exp[i] = i + 1;
+  expected = vld1q_p8 (exp);
+  for (i = 0; i < 16; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_p8_2 (in1, in2);
+  for (i = 0; i < 16; i++)
+    exp[i] = i + 2;
+  expected = vld1q_p8 (exp);
+  for (i = 0; i < 16; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_p8_3 (in1, in2);
+  for (i = 0; i < 16; i++)
+    exp[i] = i + 3;
+  expected = vld1q_p8 (exp);
+  for (i = 0; i < 16; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_p8_4 (in1, in2);
+  for (i = 0; i < 16; i++)
+    exp[i] = i + 4;
+  expected = vld1q_p8 (exp);
+  for (i = 0; i < 16; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_p8_5 (in1, in2);
+  for (i = 0; i < 16; i++)
+    exp[i] = i + 5;
+  expected = vld1q_p8 (exp);
+  for (i = 0; i < 16; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_p8_6 (in1, in2);
+  for (i = 0; i < 16; i++)
+    exp[i] = i + 6;
+  expected = vld1q_p8 (exp);
+  for (i = 0; i < 16; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_p8_7 (in1, in2);
+  for (i = 0; i < 16; i++)
+    exp[i] = i + 7;
+  expected = vld1q_p8 (exp);
+  for (i = 0; i < 16; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_p8_8 (in1, in2);
+  for (i = 0; i < 16; i++)
+    exp[i] = i + 8;
+  expected = vld1q_p8 (exp);
+  for (i = 0; i < 16; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_p8_9 (in1, in2);
+  for (i = 0; i < 16; i++)
+    exp[i] = i + 9;
+  expected = vld1q_p8 (exp);
+  for (i = 0; i < 16; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_p8_10 (in1, in2);
+  for (i = 0; i < 16; i++)
+    exp[i] = i + 10;
+  expected = vld1q_p8 (exp);
+  for (i = 0; i < 16; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_p8_11 (in1, in2);
+  for (i = 0; i < 16; i++)
+    exp[i] = i + 11;
+  expected = vld1q_p8 (exp);
+  for (i = 0; i < 16; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_p8_12 (in1, in2);
+  for (i = 0; i < 16; i++)
+    exp[i] = i + 12;
+  expected = vld1q_p8 (exp);
+  for (i = 0; i < 16; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_p8_13 (in1, in2);
+  for (i = 0; i < 16; i++)
+    exp[i] = i + 13;
+  expected = vld1q_p8 (exp);
+  for (i = 0; i < 16; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_p8_14 (in1, in2);
+  for (i = 0; i < 16; i++)
+    exp[i] = i + 14;
+  expected = vld1q_p8 (exp);
+  for (i = 0; i < 16; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_p8_15 (in1, in2);
+  for (i = 0; i < 16; i++)
+    exp[i] = i + 15;
+  expected = vld1q_p8 (exp);
+  for (i = 0; i < 16; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs8.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs8.x
@@ -0,0 +1,22 @@
+extern void abort (void);
+
+int8x16_t
+test_vrev64qs8 (int8x16_t _arg)
+{
+  return vrev64q_s8 (_arg);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  int8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+  int8x16_t reversed = test_vrev64qs8 (inorder);
+  int8x16_t expected = {8, 7, 6, 5, 4, 3, 2, 1, 16, 15, 14, 13, 12, 11, 10, 9};
+
+  for (i = 0; i < 16; i++)
+    if (reversed[i] != expected[i])
+      abort ();
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16p8_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16p8_1.c
@@ -0,0 +1,10 @@
+/* Test the `vrev16_p8' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vrev16p8.x"
+
+/* { dg-final { scan-assembler-times "rev16\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs32_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs32_1.c
@@ -0,0 +1,11 @@
+/* Test the `vtrnq_s32' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vtrnqs32.x"
+
+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps16.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps16.x
@@ -0,0 +1,26 @@
+extern void abort (void);
+
+int16x4x2_t
+test_vuzps16 (int16x4_t _a, int16x4_t _b)
+{
+  return vuzp_s16 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  int16_t first[] = {1, 2, 3, 4};
+  int16_t second[] = {5, 6, 7, 8};
+  int16x4x2_t result = test_vuzps16 (vld1_s16 (first), vld1_s16 (second));
+  int16_t exp1[] = {1, 3, 5, 7};
+  int16_t exp2[] = {2, 4, 6, 8};
+  int16x4_t expect1 = vld1_s16 (exp1);
+  int16x4_t expect2 = vld1_s16 (exp2);
+
+  for (i = 0; i < 4; i++)
+    if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu16.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu16.x
@@ -0,0 +1,26 @@
+extern void abort (void);
+
+uint16x4x2_t
+test_vuzpu16 (uint16x4_t _a, uint16x4_t _b)
+{
+  return vuzp_u16 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  uint16_t first[] = {1, 2, 3, 4};
+  uint16_t second[] = {5, 6, 7, 8};
+  uint16x4x2_t result = test_vuzpu16 (vld1_u16 (first), vld1_u16 (second));
+  uint16_t exp1[] = {1, 3, 5, 7};
+  uint16_t exp2[] = {2, 4, 6, 8};
+  uint16x4_t expect1 = vld1_u16 (exp1);
+  uint16x4_t expect2 = vld1_u16 (exp2);
+
+  for (i = 0; i < 4; i++)
+    if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu8_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu8_1.c
@@ -0,0 +1,11 @@
+/* Test the `vtrn_u8' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vtrnu8.x"
+
+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp8.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp8.x
@@ -0,0 +1,27 @@
+extern void abort (void);
+
+poly8x8x2_t
+test_vtrnp8 (poly8x8_t _a, poly8x8_t _b)
+{
+  return vtrn_p8 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  poly8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
+  poly8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
+  poly8x8x2_t result = test_vtrnp8 (vld1_p8 (first), vld1_p8 (second));
+  poly8x8_t res1 = result.val[0], res2 = result.val[1];
+  poly8_t exp1[] = {1, 9, 3, 11, 5, 13, 7, 15};
+  poly8_t exp2[] = {2, 10, 4, 12, 6, 14, 8, 16};
+  poly8x8_t expected1 = vld1_p8 (exp1);
+  poly8x8_t expected2 = vld1_p8 (exp2);
+
+  for (i = 0; i < 8; i++)
+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs16.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs16.x
@@ -0,0 +1,22 @@
+extern void abort (void);
+
+int16x8_t
+test_vrev32qs16 (int16x8_t _arg)
+{
+  return vrev32q_s16 (_arg);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  int16x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8};
+  int16x8_t reversed = test_vrev32qs16 (inorder);
+  int16x8_t expected = {2, 1, 4, 3, 6, 5, 8, 7};
+
+  for (i = 0; i < 8; i++)
+    if (reversed[i] != expected[i])
+      abort ();
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64f32_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64f32_1.c
@@ -0,0 +1,10 @@
+/* Test the `vrev64_f32' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vrev64f32.x"
+
+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.2s, ?v\[0-9\]+.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzips8.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzips8.x
@@ -0,0 +1,27 @@
+extern void abort (void);
+
+int8x8x2_t
+test_vzips8 (int8x8_t _a, int8x8_t _b)
+{
+  return vzip_s8 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  int8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
+  int8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
+  int8x8x2_t result = test_vzips8 (vld1_s8 (first), vld1_s8 (second));
+  int8x8_t res1 = result.val[0], res2 = result.val[1];
+  int8_t exp1[] = {1, 9, 2, 10, 3, 11, 4, 12};
+  int8_t exp2[] = {5, 13, 6, 14, 7, 15, 8, 16};
+  int8x8_t expected1 = vld1_s8 (exp1);
+  int8x8_t expected2 = vld1_s8 (exp2);
+
+  for (i = 0; i < 8; i++)
+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s32_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s32_1.c
@@ -0,0 +1,10 @@
+/* Test the `vextQs32' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+#include "extq_s32.x"
+
+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.4)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnf32.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnf32.x
@@ -0,0 +1,27 @@
+extern void abort (void);
+
+float32x2x2_t
+test_vtrnf32 (float32x2_t _a, float32x2_t _b)
+{
+  return vtrn_f32 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  float32_t first[] = {1, 2};
+  float32_t second[] = {3, 4};
+  float32x2x2_t result = test_vtrnf32 (vld1_f32 (first), vld1_f32 (second));
+  float32x2_t res1 = result.val[0], res2 = result.val[1];
+  float32_t exp1[] = {1, 3};
+  float32_t exp2[] = {2, 4};
+  float32x2_t expected1 = vld1_f32 (exp1);
+  float32x2_t expected2 = vld1_f32 (exp2);
+
+  for (i = 0; i < 2; i++)
+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u8_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u8_1.c
@@ -0,0 +1,10 @@
+/* Test the `vrev64_u8' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vrev64u8.x"
+
+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu16.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu16.x
@@ -0,0 +1,22 @@
+extern void abort (void);
+
+uint16x8_t
+test_vrev32qu16 (uint16x8_t _arg)
+{
+  return vrev32q_u16 (_arg);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  uint16x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8};
+  uint16x8_t reversed = test_vrev32qu16 (inorder);
+  uint16x8_t expected = {2, 1, 4, 3, 6, 5, 8, 7};
+
+  for (i = 0; i < 8; i++)
+    if (reversed[i] != expected[i])
+      abort ();
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu16_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu16_1.c
@@ -0,0 +1,10 @@
+/* Test the `vrev64q_u16' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vrev64qu16.x"
+
+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.8h, ?v\[0-9\]+.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s8_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s8_1.c
@@ -0,0 +1,10 @@
+/* Test the `vexts8' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+#include "ext_s8.x"
+
+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#?\[0-9\]+\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16u8.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16u8.x
@@ -0,0 +1,22 @@
+extern void abort (void);
+
+uint8x8_t
+test_vrev16u8 (uint8x8_t _arg)
+{
+  return vrev16_u8 (_arg);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  uint8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8};
+  uint8x8_t reversed = test_vrev16u8 (inorder);
+  uint8x8_t expected = {2, 1, 4, 3, 6, 5, 8, 7};
+
+  for (i = 0; i < 8; i++)
+    if (reversed[i] != expected[i])
+      abort ();
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs16_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs16_1.c
@@ -0,0 +1,11 @@
+/* Test the `vuzpq_s16' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vuzpqs16.x"
+
+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s64.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s64.x
@@ -0,0 +1,30 @@
+extern void abort (void);
+
+int64x2_t
+test_vextq_s64_1 (int64x2_t a, int64x2_t b)
+{
+  return vextq_s64 (a, b, 1);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i, off;
+  int64_t arr1[] = {0, 1};
+  int64x2_t in1 = vld1q_s64 (arr1);
+  int64_t arr2[] = {2, 3};
+  int64x2_t in2 = vld1q_s64 (arr2);
+  int64_t exp[2];
+  int64x2_t expected;
+  int64x2_t actual = test_vextq_s64_1 (in1, in2);
+
+  for (i = 0; i < 2; i++)
+    exp[i] = i + 1;
+  expected = vld1q_s64 (exp);
+  for (i = 0; i < 2; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp16.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp16.x
@@ -0,0 +1,27 @@
+extern void abort (void);
+
+poly16x4x2_t
+test_vzipp16 (poly16x4_t _a, poly16x4_t _b)
+{
+  return vzip_p16 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  poly16_t first[] = {1, 2, 3, 4};
+  poly16_t second[] = {5, 6, 7, 8};
+  poly16x4x2_t result = test_vzipp16 (vld1_p16 (first), vld1_p16 (second));
+  poly16x4_t res1 = result.val[0], res2 = result.val[1];
+  poly16_t exp1[] = {1, 5, 2, 6};
+  poly16_t exp2[] = {3, 7, 4, 8};
+  poly16x4_t expected1 = vld1_p16 (exp1);
+  poly16x4_t expected2 = vld1_p16 (exp2);
+
+  for (i = 0; i < 4; i++)
+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u64.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u64.x
@@ -0,0 +1,30 @@
+extern void abort (void);
+
+uint64x2_t
+test_vextq_u64_1 (uint64x2_t a, uint64x2_t b)
+{
+  return vextq_u64 (a, b, 1);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i, off;
+  uint64_t arr1[] = {0, 1};
+  uint64x2_t in1 = vld1q_u64 (arr1);
+  uint64_t arr2[] = {2, 3};
+  uint64x2_t in2 = vld1q_u64 (arr2);
+  uint64_t exp[2];
+  uint64x2_t expected;
+  uint64x2_t actual = test_vextq_u64_1 (in1, in2);
+
+  for (i = 0; i < 2; i++)
+    exp[i] = i + 1;
+  expected = vld1q_u64 (exp);
+  for (i = 0; i < 2; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu8_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu8_1.c
@@ -0,0 +1,10 @@
+/* Test the `vrev32q_u8' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vrev32qu8.x"
+
+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u16_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u16_1.c
@@ -0,0 +1,10 @@
+/* Test the `vrev64_u16' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vrev64u16.x"
+
+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.4h, ?v\[0-9\]+.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs8.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs8.x
@@ -0,0 +1,29 @@
+extern void abort (void);
+
+int8x16x2_t
+test_vzipqs8 (int8x16_t _a, int8x16_t _b)
+{
+  return vzipq_s8 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  int8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+  int8_t second[] =
+      {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32};
+  int8x16x2_t result = test_vzipqs8 (vld1q_s8 (first), vld1q_s8 (second));
+  int8x16_t res1 = result.val[0], res2 = result.val[1];
+  int8_t exp1[] = {1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, 8, 24};
+  int8_t exp2[] =
+      {9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31, 16, 32};
+  int8x16_t expected1 = vld1q_s8 (exp1);
+  int8x16_t expected2 = vld1q_s8 (exp2);
+
+  for (i = 0; i < 16; i++)
+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu8.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu8.x
@@ -0,0 +1,28 @@
+extern void abort (void);
+
+uint8x16x2_t
+test_vtrnqu8 (uint8x16_t _a, uint8x16_t _b)
+{
+  return vtrnq_u8 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  uint8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+  uint8_t second[] =
+      {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32};
+  uint8x16x2_t result = test_vtrnqu8 (vld1q_u8 (first), vld1q_u8 (second));
+  uint8x16_t res1 = result.val[0], res2 = result.val[1];
+  uint8_t exp1[] = {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31};
+  uint8_t exp2[] = {2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30, 16, 32};
+  uint8x16_t expected1 = vld1q_u8 (exp1);
+  uint8x16_t expected2 = vld1q_u8 (exp2);
+
+  for (i = 0; i < 16; i++)
+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s32.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s32.x
@@ -0,0 +1,30 @@
+extern void abort (void);
+
+int32x2_t
+test_vext_s32_1 (int32x2_t a, int32x2_t b)
+{
+  return vext_s32 (a, b, 1);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i, off;
+  int32_t arr1[] = {0, 1};
+  int32x2_t in1 = vld1_s32 (arr1);
+  int32_t arr2[] = {2, 3};
+  int32x2_t in2 = vld1_s32 (arr2);
+  int32_t exp[2];
+  int32x2_t expected;
+  int32x2_t actual = test_vext_s32_1 (in1, in2);
+
+  for (i = 0; i < 2; i++)
+    exp[i] = i + 1;
+  expected = vld1_s32 (exp);
+  for (i = 0; i < 2; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps16_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps16_1.c
@@ -0,0 +1,11 @@
+/* Test the `vuzp_s16' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vuzps16.x"
+
+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u32.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u32.x
@@ -0,0 +1,30 @@
+extern void abort (void);
+
+uint32x2_t
+test_vext_u32_1 (uint32x2_t a, uint32x2_t b)
+{
+  return vext_u32 (a, b, 1);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i, off;
+  uint32_t arr1[] = {0, 1};
+  uint32x2_t in1 = vld1_u32 (arr1);
+  uint32_t arr2[] = {2, 3};
+  uint32x2_t in2 = vld1_u32 (arr2);
+  uint32_t exp[2];
+  uint32x2_t expected;
+  uint32x2_t actual = test_vext_u32_1 (in1, in2);
+
+  for (i = 0; i < 2; i++)
+    exp[i] = i + 1;
+  expected = vld1_u32 (exp);
+  for (i = 0; i < 2; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs8_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs8_1.c
@@ -0,0 +1,11 @@
+/* Test the `vuzpq_s8' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vuzpqs8.x"
+
+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s8.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s8.x
@@ -0,0 +1,227 @@
+extern void abort (void);
+
+int8x16_t
+test_vextq_s8_1 (int8x16_t a, int8x16_t b)
+{
+  return vextq_s8 (a, b, 1);
+}
+
+int8x16_t
+test_vextq_s8_2 (int8x16_t a, int8x16_t b)
+{
+  return vextq_s8 (a, b, 2);
+}
+
+int8x16_t
+test_vextq_s8_3 (int8x16_t a, int8x16_t b)
+{
+  return vextq_s8 (a, b, 3);
+}
+
+int8x16_t
+test_vextq_s8_4 (int8x16_t a, int8x16_t b)
+{
+  return vextq_s8 (a, b, 4);
+}
+
+int8x16_t
+test_vextq_s8_5 (int8x16_t a, int8x16_t b)
+{
+  return vextq_s8 (a, b, 5);
+}
+
+int8x16_t
+test_vextq_s8_6 (int8x16_t a, int8x16_t b)
+{
+  return vextq_s8 (a, b, 6);
+}
+
+int8x16_t
+test_vextq_s8_7 (int8x16_t a, int8x16_t b)
+{
+  return vextq_s8 (a, b, 7);
+}
+
+int8x16_t
+test_vextq_s8_8 (int8x16_t a, int8x16_t b)
+{
+  return vextq_s8 (a, b, 8);
+}
+
+int8x16_t
+test_vextq_s8_9 (int8x16_t a, int8x16_t b)
+{
+  return vextq_s8 (a, b, 9);
+}
+
+int8x16_t
+test_vextq_s8_10 (int8x16_t a, int8x16_t b)
+{
+  return vextq_s8 (a, b, 10);
+}
+
+int8x16_t
+test_vextq_s8_11 (int8x16_t a, int8x16_t b)
+{
+  return vextq_s8 (a, b, 11);
+}
+
+int8x16_t
+test_vextq_s8_12 (int8x16_t a, int8x16_t b)
+{
+  return vextq_s8 (a, b, 12);
+}
+
+int8x16_t
+test_vextq_s8_13 (int8x16_t a, int8x16_t b)
+{
+  return vextq_s8 (a, b, 13);
+}
+
+int8x16_t
+test_vextq_s8_14 (int8x16_t a, int8x16_t b)
+{
+  return vextq_s8 (a, b, 14);
+}
+
+int8x16_t
+test_vextq_s8_15 (int8x16_t a, int8x16_t b)
+{
+  return vextq_s8 (a, b, 15);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  int8_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+  int8x16_t in1 = vld1q_s8 (arr1);
+  int8_t arr2[] =
+      {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31};
+  int8x16_t in2 = vld1q_s8 (arr2);
+  int8_t exp[16];
+  int8x16_t expected;
+  int8x16_t actual = test_vextq_s8_1 (in1, in2);
+
+  for (i = 0; i < 16; i++)
+    exp[i] = i + 1;
+  expected = vld1q_s8 (exp);
+  for (i = 0; i < 16; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_s8_2 (in1, in2);
+  for (i = 0; i < 16; i++)
+    exp[i] = i + 2;
+  expected = vld1q_s8 (exp);
+  for (i = 0; i < 16; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_s8_3 (in1, in2);
+  for (i = 0; i < 16; i++)
+    exp[i] = i + 3;
+  expected = vld1q_s8 (exp);
+  for (i = 0; i < 16; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_s8_4 (in1, in2);
+  for (i = 0; i < 16; i++)
+    exp[i] = i + 4;
+  expected = vld1q_s8 (exp);
+  for (i = 0; i < 16; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_s8_5 (in1, in2);
+  for (i = 0; i < 16; i++)
+    exp[i] = i + 5;
+  expected = vld1q_s8 (exp);
+  for (i = 0; i < 16; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_s8_6 (in1, in2);
+  for (i = 0; i < 16; i++)
+    exp[i] = i + 6;
+  expected = vld1q_s8 (exp);
+  for (i = 0; i < 16; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_s8_7 (in1, in2);
+  for (i = 0; i < 16; i++)
+    exp[i] = i + 7;
+  expected = vld1q_s8 (exp);
+  for (i = 0; i < 16; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_s8_8 (in1, in2);
+  for (i = 0; i < 16; i++)
+    exp[i] = i + 8;
+  expected = vld1q_s8 (exp);
+  for (i = 0; i < 16; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_s8_9 (in1, in2);
+  for (i = 0; i < 16; i++)
+    exp[i] = i + 9;
+  expected = vld1q_s8 (exp);
+  for (i = 0; i < 16; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_s8_10 (in1, in2);
+  for (i = 0; i < 16; i++)
+    exp[i] = i + 10;
+  expected = vld1q_s8 (exp);
+  for (i = 0; i < 16; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_s8_11 (in1, in2);
+  for (i = 0; i < 16; i++)
+    exp[i] = i + 11;
+  expected = vld1q_s8 (exp);
+  for (i = 0; i < 16; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_s8_12 (in1, in2);
+  for (i = 0; i < 16; i++)
+    exp[i] = i + 12;
+  expected = vld1q_s8 (exp);
+  for (i = 0; i < 16; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_s8_13 (in1, in2);
+  for (i = 0; i < 16; i++)
+    exp[i] = i + 13;
+  expected = vld1q_s8 (exp);
+  for (i = 0; i < 16; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_s8_14 (in1, in2);
+  for (i = 0; i < 16; i++)
+    exp[i] = i + 14;
+  expected = vld1q_s8 (exp);
+  for (i = 0; i < 16; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_s8_15 (in1, in2);
+  for (i = 0; i < 16; i++)
+    exp[i] = i + 15;
+  expected = vld1q_s8 (exp);
+  for (i = 0; i < 16; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_f64_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_f64_1.c
@@ -0,0 +1,36 @@
+/* Test the `vextq_f64' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+extern void abort (void);
+#include <stdio.h>
+
+float64x2_t
+test_vextq_f64_1 (float64x2_t a, float64x2_t b)
+{
+  return vextq_f64 (a, b, 1);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i, off;
+  float64_t arr1[] = {0, 1};
+  float64x2_t in1 = vld1q_f64 (arr1);
+  float64_t arr2[] = {2, 3};
+  float64x2_t in2 = vld1q_f64 (arr2);
+  float64_t exp[] = {1, 2};
+  float64x2_t expected = vld1q_f64 (exp);
+  float64x2_t actual = test_vextq_f64_1 (in1, in2);
+
+  for (i = 0; i < 2; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.8\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vpaddd_f64.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vpaddd_f64.c
@@ -0,0 +1,27 @@
+/* Test the vpaddd_f64 AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3" } */
+
+#include "arm_neon.h"
+
+#define SIZE 6
+
+extern void abort (void);
+
+float64_t in[SIZE] = { -4.0, 4.0, -2.0, 2.0, -1.0, 1.0 };
+
+int
+main (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE / 2; ++i)
+    if (vpaddd_f64 (vld1q_f64 (in + 2 * i)) != 0.0)
+      abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-assembler "faddp\[ \t\]+\[dD\]\[0-9\]+, v\[0-9\].2d+\n" } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs16_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs16_1.c
@@ -0,0 +1,10 @@
+/* Test the `vrev32q_s16' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vrev32qs16.x"
+
+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.8h, ?v\[0-9\]+.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs16_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs16_1.c
@@ -0,0 +1,11 @@
+/* Test the `vzipq_s16' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vzipqs16.x"
+
+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipf32_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipf32_1.c
@@ -0,0 +1,11 @@
+/* Test the `vzip_f32' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vzipf32.x"
+
+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16p8.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16p8.x
@@ -0,0 +1,22 @@
+extern void abort (void);
+
+poly8x8_t
+test_vrev16p8 (poly8x8_t _arg)
+{
+  return vrev16_p8 (_arg);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  poly8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8};
+  poly8x8_t reversed = test_vrev16p8 (inorder);
+  poly8x8_t expected = {2, 1, 4, 3, 6, 5, 8, 7};
+
+  for (i = 0; i < 8; i++)
+    if (reversed[i] != expected[i])
+      abort ();
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16u8_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16u8_1.c
@@ -0,0 +1,10 @@
+/* Test the `vrev16_u8' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vrev16u8.x"
+
+/* { dg-final { scan-assembler-times "rev16\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p8_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p8_1.c
@@ -0,0 +1,10 @@
+/* Test the `vextp8' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+#include "ext_p8.x"
+
+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#?\[0-9\]+\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns8.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns8.x
@@ -0,0 +1,27 @@
+extern void abort (void);
+
+int8x8x2_t
+test_vtrns8 (int8x8_t _a, int8x8_t _b)
+{
+  return vtrn_s8 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  int8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
+  int8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
+  int8x8x2_t result = test_vtrns8 (vld1_s8 (first), vld1_s8 (second));
+  int8x8_t res1 = result.val[0], res2 = result.val[1];
+  int8_t exp1[] = {1, 9, 3, 11, 5, 13, 7, 15};
+  int8_t exp2[] = {2, 10, 4, 12, 6, 14, 8, 16};
+  int8x8_t expected1 = vld1_s8 (exp1);
+  int8x8_t expected2 = vld1_s8 (exp2);
+
+  for (i = 0; i < 8; i++)
+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs16.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs16.x
@@ -0,0 +1,27 @@
+extern void abort (void);
+
+int16x8x2_t
+test_vtrnqs16 (int16x8_t _a, int16x8_t _b)
+{
+  return vtrnq_s16 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  int16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
+  int16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
+  int16x8x2_t result = test_vtrnqs16 (vld1q_s16 (first), vld1q_s16 (second));
+  int16x8_t res1 = result.val[0], res2 = result.val[1];
+  int16_t exp1[] = {1, 9, 3, 11, 5, 13, 7, 15};
+  int16_t exp2[] = {2, 10, 4, 12, 6, 14, 8, 16};
+  int16x8_t expected1 = vld1q_s16 (exp1);
+  int16x8_t expected2 = vld1q_s16 (exp2);
+
+  for (i = 0; i < 8; i++)
+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu16.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu16.x
@@ -0,0 +1,27 @@
+extern void abort (void);
+
+uint16x8x2_t
+test_vtrnqu16 (uint16x8_t _a, uint16x8_t _b)
+{
+  return vtrnq_u16 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  uint16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
+  uint16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
+  uint16x8x2_t result = test_vtrnqu16 (vld1q_u16 (first), vld1q_u16 (second));
+  uint16x8_t res1 = result.val[0], res2 = result.val[1];
+  uint16_t exp1[] = {1, 9, 3, 11, 5, 13, 7, 15};
+  uint16_t exp2[] = {2, 10, 4, 12, 6, 14, 8, 16};
+  uint16x8_t expected1 = vld1q_u16 (exp1);
+  uint16x8_t expected2 = vld1q_u16 (exp2);
+
+  for (i = 0; i < 8; i++)
+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p16.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p16.x
@@ -0,0 +1,114 @@
+extern void abort (void);
+
+poly16x8_t
+test_vextq_p16_1 (poly16x8_t a, poly16x8_t b)
+{
+  return vextq_p16 (a, b, 1);
+}
+
+poly16x8_t
+test_vextq_p16_2 (poly16x8_t a, poly16x8_t b)
+{
+  return vextq_p16 (a, b, 2);
+}
+
+poly16x8_t
+test_vextq_p16_3 (poly16x8_t a, poly16x8_t b)
+{
+  return vextq_p16 (a, b, 3);
+}
+
+poly16x8_t
+test_vextq_p16_4 (poly16x8_t a, poly16x8_t b)
+{
+  return vextq_p16 (a, b, 4);
+}
+
+poly16x8_t
+test_vextq_p16_5 (poly16x8_t a, poly16x8_t b)
+{
+  return vextq_p16 (a, b, 5);
+}
+
+poly16x8_t
+test_vextq_p16_6 (poly16x8_t a, poly16x8_t b)
+{
+  return vextq_p16 (a, b, 6);
+}
+
+poly16x8_t
+test_vextq_p16_7 (poly16x8_t a, poly16x8_t b)
+{
+  return vextq_p16 (a, b, 7);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i, off;
+  poly16_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7};
+  poly16x8_t in1 = vld1q_p16 (arr1);
+  poly16_t arr2[] = {8, 9, 10, 11, 12, 13, 14, 15};
+  poly16x8_t in2 = vld1q_p16 (arr2);
+  poly16_t exp[8];
+  poly16x8_t expected;
+  poly16x8_t actual = test_vextq_p16_1 (in1, in2);
+
+  for (i = 0; i < 8; i++)
+    exp[i] = i + 1;
+  expected = vld1q_p16 (exp);
+  for (i = 0; i < 8; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_p16_2 (in1, in2);
+  for (i = 0; i < 8; i++)
+    exp[i] = i + 2;
+  expected = vld1q_p16 (exp);
+  for (i = 0; i < 8; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_p16_3 (in1, in2);
+  for (i = 0; i < 8; i++)
+    exp[i] = i + 3;
+  expected = vld1q_p16 (exp);
+  for (i = 0; i < 8; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_p16_4 (in1, in2);
+  for (i = 0; i < 8; i++)
+    exp[i] = i + 4;
+  expected = vld1q_p16 (exp);
+  for (i = 0; i < 8; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_p16_5 (in1, in2);
+  for (i = 0; i < 8; i++)
+    exp[i] = i + 5;
+  expected = vld1q_p16 (exp);
+  for (i = 0; i < 8; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_p16_6 (in1, in2);
+  for (i = 0; i < 8; i++)
+    exp[i] = i + 6;
+  expected = vld1q_p16 (exp);
+  for (i = 0; i < 8; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_p16_7 (in1, in2);
+  for (i = 0; i < 8; i++)
+    exp[i] = i + 7;
+  expected = vld1q_p16 (exp);
+  for (i = 0; i < 8; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs16.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs16.x
@@ -0,0 +1,22 @@
+extern void abort (void);
+
+int16x8_t
+test_vrev64qs16 (int16x8_t _arg)
+{
+  return vrev64q_s16 (_arg);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  int16x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8};
+  int16x8_t reversed = test_vrev64qs16 (inorder);
+  int16x8_t expected = {4, 3, 2, 1, 8, 7, 6, 5};
+
+  for (i = 0; i < 8; i++)
+    if (reversed[i] != expected[i])
+      abort ();
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu16.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu16.x
@@ -0,0 +1,22 @@
+extern void abort (void);
+
+uint16x8_t
+test_vrev64qu16 (uint16x8_t _arg)
+{
+  return vrev64q_u16 (_arg);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  uint16x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8};
+  uint16x8_t reversed = test_vrev64qu16 (inorder);
+  uint16x8_t expected = {4, 3, 2, 1, 8, 7, 6, 5};
+
+  for (i = 0; i < 8; i++)
+    if (reversed[i] != expected[i])
+      abort ();
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u8.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u8.x
@@ -0,0 +1,22 @@
+extern void abort (void);
+
+uint8x8_t
+test_vrev64u8 (uint8x8_t _arg)
+{
+  return vrev64_u8 (_arg);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  uint8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8};
+  uint8x8_t reversed = test_vrev64u8 (inorder);
+  uint8x8_t expected = {8, 7, 6, 5, 4, 3, 2, 1};
+
+  for (i = 0; i < 8; i++)
+    if (reversed[i] != expected[i])
+      abort ();
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp16.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp16.x
@@ -0,0 +1,26 @@
+extern void abort (void);
+
+poly16x8x2_t
+test_vuzpqp16 (poly16x8_t _a, poly16x8_t _b)
+{
+  return vuzpq_p16 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  poly16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
+  poly16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
+  poly16x8x2_t result = test_vuzpqp16 (vld1q_p16 (first), vld1q_p16 (second));
+  poly16_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15};
+  poly16_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16};
+  poly16x8_t expect1 = vld1q_p16 (exp1);
+  poly16x8_t expect2 = vld1q_p16 (exp2);
+
+  for (i = 0; i < 8; i++)
+    if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns16_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns16_1.c
@@ -0,0 +1,11 @@
+/* Test the `vtrn_s16' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vtrns16.x"
+
+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpf32.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpf32.x
@@ -0,0 +1,26 @@
+extern void abort (void);
+
+float32x2x2_t
+test_vuzpf32 (float32x2_t _a, float32x2_t _b)
+{
+  return vuzp_f32 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  float32_t first[] = {1, 2};
+  float32_t second[] = {3, 4};
+  float32x2x2_t result = test_vuzpf32 (vld1_f32 (first), vld1_f32 (second));
+  float32_t exp1[] = {1, 3};
+  float32_t exp2[] = {2, 4};
+  float32x2_t expect1 = vld1_f32 (exp1);
+  float32x2_t expect2 = vld1_f32 (exp2);
+
+  for (i = 0; i < 2; i++)
+    if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu16_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu16_1.c
@@ -0,0 +1,11 @@
+/* Test the `vzip_u16' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vzipu16.x"
+
+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqf32_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqf32_1.c
@@ -0,0 +1,11 @@
+/* Test the `vtrnq_f32' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vtrnqf32.x"
+
+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs8_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs8_1.c
@@ -0,0 +1,11 @@
+/* Test the `vtrnq_s8' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vtrnqs8.x"
+
+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp8.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp8.x
@@ -0,0 +1,28 @@
+extern void abort (void);
+
+poly8x16x2_t
+test_vtrnqp8 (poly8x16_t _a, poly8x16_t _b)
+{
+  return vtrnq_p8 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  poly8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+  poly8_t second[] =
+      {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32};
+  poly8x16x2_t result = test_vtrnqp8 (vld1q_p8 (first), vld1q_p8 (second));
+  poly8x16_t res1 = result.val[0], res2 = result.val[1];
+  poly8_t exp1[] = {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31};
+  poly8_t exp2[] = {2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30, 16, 32};
+  poly8x16_t expected1 = vld1q_p8 (exp1);
+  poly8x16_t expected2 = vld1q_p8 (exp2);
+
+  for (i = 0; i < 16; i++)
+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s32.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s32.x
@@ -0,0 +1,22 @@
+extern void abort (void);
+
+int32x2_t
+test_vrev64s32 (int32x2_t _arg)
+{
+  return vrev64_s32 (_arg);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  int32x2_t inorder = {1, 2};
+  int32x2_t reversed = test_vrev64s32 (inorder);
+  int32x2_t expected = {2, 1};
+
+  for (i = 0; i < 2; i++)
+    if (reversed[i] != expected[i])
+      abort ();
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s16_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s16_1.c
@@ -0,0 +1,10 @@
+/* Test the `vexts16' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+#include "ext_s16.x"
+
+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu8.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu8.x
@@ -0,0 +1,22 @@
+extern void abort (void);
+
+uint8x16_t
+test_vrev32qu8 (uint8x16_t _arg)
+{
+  return vrev32q_u8 (_arg);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  uint8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+  uint8x16_t reversed = test_vrev32qu8 (inorder);
+  uint8x16_t expected = {4, 3, 2, 1, 8, 7, 6, 5, 12, 11, 10, 9, 16, 15, 14, 13};
+
+  for (i = 0; i < 16; i++)
+    if (reversed[i] != expected[i])
+      abort ();
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u32.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u32.x
@@ -0,0 +1,22 @@
+extern void abort (void);
+
+uint32x2_t
+test_vrev64u32 (uint32x2_t _arg)
+{
+  return vrev64_u32 (_arg);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  uint32x2_t inorder = {1, 2};
+  uint32x2_t reversed = test_vrev64u32 (inorder);
+  uint32x2_t expected = {2, 1};
+
+  for (i = 0; i < 2; i++)
+    if (reversed[i] != expected[i])
+      abort ();
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_f32_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_f32_1.c
@@ -0,0 +1,10 @@
+/* Test the `vextQf32' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+#include "extq_f32.x"
+
+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.4)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qu8.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qu8.x
@@ -0,0 +1,22 @@
+extern void abort (void);
+
+uint8x16_t
+test_vrev16qu8 (uint8x16_t _arg)
+{
+  return vrev16q_u8 (_arg);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  uint8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+  uint8x16_t reversed = test_vrev16qu8 (inorder);
+  uint8x16_t expected = {2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15};
+
+  for (i = 0; i < 16; i++)
+    if (reversed[i] != expected[i])
+      abort ();
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp8_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp8_1.c
@@ -0,0 +1,11 @@
+/* Test the `vuzpq_p8' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vuzpqp8.x"
+
+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp16_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp16_1.c
@@ -0,0 +1,10 @@
+/* Test the `vrev64q_p16' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vrev64qp16.x"
+
+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.8h, ?v\[0-9\]+.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp16.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp16.x
@@ -0,0 +1,27 @@
+extern void abort (void);
+
+poly16x8x2_t
+test_vzipqp16 (poly16x8_t _a, poly16x8_t _b)
+{
+  return vzipq_p16 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  poly16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
+  poly16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
+  poly16x8x2_t result = test_vzipqp16 (vld1q_p16 (first), vld1q_p16 (second));
+  poly16x8_t res1 = result.val[0], res2 = result.val[1];
+  poly16_t exp1[] = {1, 9, 2, 10, 3, 11, 4, 12};
+  poly16_t exp2[] = {5, 13, 6, 14, 7, 15, 8, 16};
+  poly16x8_t expected1 = vld1q_p16 (exp1);
+  poly16x8_t expected2 = vld1q_p16 (exp2);
+
+  for (i = 0; i < 8; i++)
+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu16_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu16_1.c
@@ -0,0 +1,11 @@
+/* Test the `vtrnq_u16' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vtrnqu16.x"
+
+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu32_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu32_1.c
@@ -0,0 +1,10 @@
+/* Test the `vrev64q_u32' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vrev64qu32.x"
+
+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.4s, ?v\[0-9\]+.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu8.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu8.x
@@ -0,0 +1,27 @@
+extern void abort (void);
+
+uint8x16x2_t
+test_vuzpqu8 (uint8x16_t _a, uint8x16_t _b)
+{
+  return vuzpq_u8 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  uint8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+  uint8_t second[] =
+      {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32};
+  uint8x16x2_t result = test_vuzpqu8 (vld1q_u8 (first), vld1q_u8 (second));
+  uint8_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31};
+  uint8_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32};
+  uint8x16_t expect1 = vld1q_u8 (exp1);
+  uint8x16_t expect2 = vld1q_u8 (exp2);
+
+  for (i = 0; i < 16; i++)
+    if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p8.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p8.x
@@ -0,0 +1,22 @@
+extern void abort (void);
+
+poly8x8_t
+test_vrev64p8 (poly8x8_t _arg)
+{
+  return vrev64_p8 (_arg);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  poly8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8};
+  poly8x8_t reversed = test_vrev64p8 (inorder);
+  poly8x8_t expected = {8, 7, 6, 5, 4, 3, 2, 1};
+
+  for (i = 0; i < 8; i++)
+    if (reversed[i] != expected[i])
+      abort ();
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u8.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u8.x
@@ -0,0 +1,22 @@
+extern void abort (void);
+
+uint8x8_t
+test_vrev32u8 (uint8x8_t _arg)
+{
+  return vrev32_u8 (_arg);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  uint8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8};
+  uint8x8_t reversed = test_vrev32u8 (inorder);
+  uint8x8_t expected = {4, 3, 2, 1, 8, 7, 6, 5};
+
+  for (i = 0; i < 8; i++)
+    if (reversed[i] != expected[i])
+      abort ();
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16s8.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16s8.x
@@ -0,0 +1,22 @@
+extern void abort (void);
+
+int8x8_t
+test_vrev16s8 (int8x8_t _arg)
+{
+  return vrev16_s8 (_arg);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  int8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8};
+  int8x8_t reversed = test_vrev16s8 (inorder);
+  int8x8_t expected = {2, 1, 4, 3, 6, 5, 8, 7};
+
+  for (i = 0; i < 8; i++)
+    if (reversed[i] != expected[i])
+      abort ();
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u8_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u8_1.c
@@ -0,0 +1,10 @@
+/* Test the `vextu8' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+#include "ext_u8.x"
+
+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#?\[0-9\]+\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u16_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u16_1.c
@@ -0,0 +1,10 @@
+/* Test the `vextQu16' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+#include "extq_u16.x"
+
+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/int_comparisons.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/int_comparisons.x
@@ -0,0 +1,68 @@
+/*  test_vcXXX wrappers for all the vcXXX (vector compare) and vtst intrinsics
+    in arm_neon.h (excluding the 64x1 variants as these generally produce scalar
+    not vector ops).  */
+#include "arm_neon.h"
+
+#define DONT_FORCE(X)
+
+#define FORCE_SIMD(V1)   asm volatile ("mov %d0, %1.d[0]"       \
+           : "=w"(V1)                                           \
+           : "w"(V1)                                            \
+           : /* No clobbers */);
+
+#define OP1(SIZE, OP, BASETYPE, SUFFIX, FORCE) uint##SIZE##_t	\
+test_v##OP##SUFFIX (BASETYPE##SIZE##_t a)			\
+{								\
+  uint##SIZE##_t res;						\
+  FORCE (a);							\
+  res = v##OP##SUFFIX (a);					\
+  FORCE (res);							\
+  return res;							\
+}
+
+#define OP2(SIZE, OP, BASETYPE, SUFFIX, FORCE) uint##SIZE##_t	\
+test_v##OP##SUFFIX (BASETYPE##SIZE##_t a, BASETYPE##SIZE##_t b) \
+{								\
+  uint##SIZE##_t res;						\
+  FORCE (a);							\
+  FORCE (b);							\
+  res = v##OP##SUFFIX (a, b);					\
+  FORCE (res);							\
+  return res;							\
+}
+
+#define UNSIGNED_OPS(SIZE, BASETYPE, SUFFIX, FORCE) \
+OP2 (SIZE, tst, BASETYPE, SUFFIX, FORCE) \
+OP1 (SIZE, ceqz, BASETYPE, SUFFIX, FORCE) \
+OP2 (SIZE, ceq, BASETYPE, SUFFIX, FORCE) \
+OP2 (SIZE, cge, BASETYPE, SUFFIX, FORCE) \
+OP2 (SIZE, cgt, BASETYPE, SUFFIX, FORCE) \
+OP2 (SIZE, cle, BASETYPE, SUFFIX, FORCE) \
+OP2 (SIZE, clt, BASETYPE, SUFFIX, FORCE)
+
+#define ALL_OPS(SIZE, BASETYPE, SUFFIX, FORCE) \
+OP1 (SIZE, cgez, BASETYPE, SUFFIX, FORCE) \
+OP1 (SIZE, cgtz, BASETYPE, SUFFIX, FORCE) \
+OP1 (SIZE, clez, BASETYPE, SUFFIX, FORCE) \
+OP1 (SIZE, cltz, BASETYPE, SUFFIX, FORCE) \
+UNSIGNED_OPS (SIZE, BASETYPE, SUFFIX, FORCE)
+
+ALL_OPS (8x8, int, _s8, DONT_FORCE)
+ALL_OPS (16x4, int, _s16, DONT_FORCE)
+ALL_OPS (32x2, int, _s32, DONT_FORCE)
+ALL_OPS (64x1, int, _s64, DONT_FORCE)
+ALL_OPS (64, int, d_s64, FORCE_SIMD)
+ALL_OPS (8x16, int, q_s8, DONT_FORCE)
+ALL_OPS (16x8, int, q_s16, DONT_FORCE)
+ALL_OPS (32x4, int, q_s32, DONT_FORCE)
+ALL_OPS (64x2, int, q_s64, DONT_FORCE)
+UNSIGNED_OPS (8x8, uint, _u8, DONT_FORCE)
+UNSIGNED_OPS (16x4, uint, _u16, DONT_FORCE)
+UNSIGNED_OPS (32x2, uint, _u32, DONT_FORCE)
+UNSIGNED_OPS (64x1, uint, _u64, DONT_FORCE)
+UNSIGNED_OPS (64, uint, d_u64, FORCE_SIMD)
+UNSIGNED_OPS (8x16, uint, q_u8, DONT_FORCE)
+UNSIGNED_OPS (16x8, uint, q_u16, DONT_FORCE)
+UNSIGNED_OPS (32x4, uint, q_u32, DONT_FORCE)
+UNSIGNED_OPS (64x2, uint, q_u64, DONT_FORCE)
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs32_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs32_1.c
@@ -0,0 +1,11 @@
+/* Test the `vuzpq_s32' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vuzpqs32.x"
+
+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps8_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps8_1.c
@@ -0,0 +1,11 @@
+/* Test the `vuzp_s8' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vuzps8.x"
+
+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp8_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp8_1.c
@@ -0,0 +1,11 @@
+/* Test the `vtrnq_p8' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vtrnqp8.x"
+
+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p16_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p16_1.c
@@ -0,0 +1,10 @@
+/* Test the `vrev64_p16' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vrev64p16.x"
+
+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.4h, ?v\[0-9\]+.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u16_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u16_1.c
@@ -0,0 +1,10 @@
+/* Test the `vrev32_u16' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vrev32u16.x"
+
+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.4h, ?v\[0-9\]+.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp8.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp8.x
@@ -0,0 +1,22 @@
+extern void abort (void);
+
+poly8x16_t
+test_vrev32qp8 (poly8x16_t _arg)
+{
+  return vrev32q_p8 (_arg);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  poly8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+  poly8x16_t reversed = test_vrev32qp8 (inorder);
+  poly8x16_t expected = {4, 3, 2, 1, 8, 7, 6, 5, 12, 11, 10, 9, 16, 15, 14, 13};
+
+  for (i = 0; i < 16; i++)
+    if (reversed[i] != expected[i])
+      abort ();
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qs8_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qs8_1.c
@@ -0,0 +1,10 @@
+/* Test the `vrev16q_s8' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vrev16qs8.x"
+
+/* { dg-final { scan-assembler-times "rev16\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp16.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp16.x
@@ -0,0 +1,27 @@
+extern void abort (void);
+
+poly16x4x2_t
+test_vtrnp16 (poly16x4_t _a, poly16x4_t _b)
+{
+  return vtrn_p16 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  poly16_t first[] = {1, 2, 3, 4};
+  poly16_t second[] = {5, 6, 7, 8};
+  poly16x4x2_t result = test_vtrnp16 (vld1_p16 (first), vld1_p16 (second));
+  poly16x4_t res1 = result.val[0], res2 = result.val[1];
+  poly16_t exp1[] = {1, 5, 3, 7};
+  poly16_t exp2[] = {2, 6, 4, 8};
+  poly16x4_t expected1 = vld1_p16 (exp1);
+  poly16x4_t expected2 = vld1_p16 (exp2);
+
+  for (i = 0; i < 4; i++)
+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzips32.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzips32.x
@@ -0,0 +1,27 @@
+extern void abort (void);
+
+int32x2x2_t
+test_vzips32 (int32x2_t _a, int32x2_t _b)
+{
+  return vzip_s32 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  int32_t first[] = {1, 2};
+  int32_t second[] = {3, 4};
+  int32x2x2_t result = test_vzips32 (vld1_s32 (first), vld1_s32 (second));
+  int32x2_t res1 = result.val[0], res2 = result.val[1];
+  int32_t exp1[] = {1, 3};
+  int32_t exp2[] = {2, 4};
+  int32x2_t expected1 = vld1_s32 (exp1);
+  int32x2_t expected2 = vld1_s32 (exp2);
+
+  for (i = 0; i < 2; i++)
+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u32_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u32_1.c
@@ -0,0 +1,10 @@
+/* Test the `vrev64_u32' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vrev64u32.x"
+
+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.2s, ?v\[0-9\]+.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qp8.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qp8.x
@@ -0,0 +1,22 @@
+extern void abort (void);
+
+poly8x16_t
+test_vrev16qp8 (poly8x16_t _arg)
+{
+  return vrev16q_p8 (_arg);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  poly8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+  poly8x16_t reversed = test_vrev16qp8 (inorder);
+  poly8x16_t expected = {2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15};
+
+  for (i = 0; i < 16; i++)
+    if (reversed[i] != expected[i])
+      abort ();
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu32.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu32.x
@@ -0,0 +1,27 @@
+extern void abort (void);
+
+uint32x2x2_t
+test_vzipu32 (uint32x2_t _a, uint32x2_t _b)
+{
+  return vzip_u32 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  uint32_t first[] = {1, 2};
+  uint32_t second[] = {3, 4};
+  uint32x2x2_t result = test_vzipu32 (vld1_u32 (first), vld1_u32 (second));
+  uint32x2_t res1 = result.val[0], res2 = result.val[1];
+  uint32_t exp1[] = {1, 3};
+  uint32_t exp2[] = {2, 4};
+  uint32x2_t expected1 = vld1_u32 (exp1);
+  uint32x2_t expected2 = vld1_u32 (exp2);
+
+  for (i = 0; i < 2; i++)
+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqf32.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqf32.x
@@ -0,0 +1,27 @@
+extern void abort (void);
+
+float32x4x2_t
+test_vtrnqf32 (float32x4_t _a, float32x4_t _b)
+{
+  return vtrnq_f32 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  float32_t first[] = {1, 2, 3, 4};
+  float32_t second[] = {5, 6, 7, 8};
+  float32x4x2_t result = test_vtrnqf32 (vld1q_f32 (first), vld1q_f32 (second));
+  float32x4_t res1 = result.val[0], res2 = result.val[1];
+  float32_t exp1[] = {1, 5, 3, 7};
+  float32_t exp2[] = {2, 6, 4, 8};
+  float32x4_t expected1 = vld1q_f32 (exp1);
+  float32x4_t expected2 = vld1q_f32 (exp2);
+
+  for (i = 0; i < 4; i++)
+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs8.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs8.x
@@ -0,0 +1,28 @@
+extern void abort (void);
+
+int8x16x2_t
+test_vtrnqs8 (int8x16_t _a, int8x16_t _b)
+{
+  return vtrnq_s8 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  int8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+  int8_t second[] =
+      {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32};
+  int8x16x2_t result = test_vtrnqs8 (vld1q_s8 (first), vld1q_s8 (second));
+  int8x16_t res1 = result.val[0], res2 = result.val[1];
+  int8_t exp1[] = {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31};
+  int8_t exp2[] = {2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30, 16, 32};
+  int8x16_t expected1 = vld1q_s8 (exp1);
+  int8x16_t expected2 = vld1q_s8 (exp2);
+
+  for (i = 0; i < 16; i++)
+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s64_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s64_1.c
@@ -0,0 +1,11 @@
+/* Test the `vexts64' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+#include "ext_s64.x"
+
+/* Do not scan-assembler.  An EXT instruction could be emitted, but would merely
+   return its first argument, so it is legitimate to optimize it out.  */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps32_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps32_1.c
@@ -0,0 +1,11 @@
+/* Test the `vuzp_s32' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vuzps32.x"
+
+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qf32.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qf32.x
@@ -0,0 +1,22 @@
+extern void abort (void);
+
+float32x4_t
+test_vrev64qf32 (float32x4_t _arg)
+{
+  return vrev64q_f32 (_arg);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  float32x4_t inorder = {1, 2, 3, 4};
+  float32x4_t reversed = test_vrev64qf32 (inorder);
+  float32x4_t expected = {2, 1, 4, 3};
+
+  for (i = 0; i < 4; i++)
+    if (reversed[i] != expected[i])
+      abort ();
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s16.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s16.x
@@ -0,0 +1,58 @@
+extern void abort (void);
+
+int16x4_t
+test_vext_s16_1 (int16x4_t a, int16x4_t b)
+{
+  return vext_s16 (a, b, 1);
+}
+
+int16x4_t
+test_vext_s16_2 (int16x4_t a, int16x4_t b)
+{
+  return vext_s16 (a, b, 2);
+}
+
+int16x4_t
+test_vext_s16_3 (int16x4_t a, int16x4_t b)
+{
+  return vext_s16 (a, b, 3);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i, off;
+  int16_t arr1[] = {0, 1, 2, 3};
+  int16x4_t in1 = vld1_s16 (arr1);
+  int16_t arr2[] = {4, 5, 6, 7};
+  int16x4_t in2 = vld1_s16 (arr2);
+  int16_t exp[4];
+  int16x4_t expected;
+  int16x4_t actual = test_vext_s16_1 (in1, in2);
+
+  for (i = 0; i < 4; i++)
+    exp[i] = i + 1;
+  expected = vld1_s16 (exp);
+  for (i = 0; i < 4; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vext_s16_2 (in1, in2);
+  for (i = 0; i < 4; i++)
+    exp[i] = i + 2;
+  expected = vld1_s16 (exp);
+  for (i = 0; i < 4; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vext_s16_3 (in1, in2);
+  for (i = 0; i < 4; i++)
+    exp[i] = i + 3;
+  expected = vld1_s16 (exp);
+  for (i = 0; i < 4; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u16.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u16.x
@@ -0,0 +1,58 @@
+extern void abort (void);
+
+uint16x4_t
+test_vext_u16_1 (uint16x4_t a, uint16x4_t b)
+{
+  return vext_u16 (a, b, 1);
+}
+
+uint16x4_t
+test_vext_u16_2 (uint16x4_t a, uint16x4_t b)
+{
+  return vext_u16 (a, b, 2);
+}
+
+uint16x4_t
+test_vext_u16_3 (uint16x4_t a, uint16x4_t b)
+{
+  return vext_u16 (a, b, 3);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i, off;
+  uint16_t arr1[] = {0, 1, 2, 3};
+  uint16x4_t in1 = vld1_u16 (arr1);
+  uint16_t arr2[] = {4, 5, 6, 7};
+  uint16x4_t in2 = vld1_u16 (arr2);
+  uint16_t exp[4];
+  uint16x4_t expected;
+  uint16x4_t actual = test_vext_u16_1 (in1, in2);
+
+  for (i = 0; i < 4; i++)
+    exp[i] = i + 1;
+  expected = vld1_u16 (exp);
+  for (i = 0; i < 4; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vext_u16_2 (in1, in2);
+  for (i = 0; i < 4; i++)
+    exp[i] = i + 2;
+  expected = vld1_u16 (exp);
+  for (i = 0; i < 4; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vext_u16_3 (in1, in2);
+  for (i = 0; i < 4; i++)
+    exp[i] = i + 3;
+  expected = vld1_u16 (exp);
+  for (i = 0; i < 4; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs32_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs32_1.c
@@ -0,0 +1,11 @@
+/* Test the `vzipq_s32' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vzipqs32.x"
+
+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp8.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp8.x
@@ -0,0 +1,27 @@
+extern void abort (void);
+
+poly8x16x2_t
+test_vuzpqp8 (poly8x16_t _a, poly8x16_t _b)
+{
+  return vuzpq_p8 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  poly8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+  poly8_t second[] =
+      {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32};
+  poly8x16x2_t result = test_vuzpqp8 (vld1q_p8 (first), vld1q_p8 (second));
+  poly8_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31};
+  poly8_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32};
+  poly8x16_t expect1 = vld1q_p8 (exp1);
+  poly8x16_t expect2 = vld1q_p8 (exp2);
+
+  for (i = 0; i < 16; i++)
+    if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu8_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu8_1.c
@@ -0,0 +1,11 @@
+/* Test the `vuzpq_u8' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vuzpqu8.x"
+
+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzips8_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzips8_1.c
@@ -0,0 +1,11 @@
+/* Test the `vzip_s8' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vzips8.x"
+
+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p8.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p8.x
@@ -0,0 +1,22 @@
+extern void abort (void);
+
+poly8x8_t
+test_vrev32p8 (poly8x8_t _arg)
+{
+  return vrev32_p8 (_arg);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  poly8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8};
+  poly8x8_t reversed = test_vrev32p8 (inorder);
+  poly8x8_t expected = {4, 3, 2, 1, 8, 7, 6, 5};
+
+  for (i = 0; i < 8; i++)
+    if (reversed[i] != expected[i])
+      abort ();
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s8.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s8.x
@@ -0,0 +1,22 @@
+extern void abort (void);
+
+int8x8_t
+test_vrev64s8 (int8x8_t _arg)
+{
+  return vrev64_s8 (_arg);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  int8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8};
+  int8x8_t reversed = test_vrev64s8 (inorder);
+  int8x8_t expected = {8, 7, 6, 5, 4, 3, 2, 1};
+
+  for (i = 0; i < 8; i++)
+    if (reversed[i] != expected[i])
+      abort ();
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp8_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp8_1.c
@@ -0,0 +1,11 @@
+/* Test the `vuzp_p8' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vuzpp8.x"
+
+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s32.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s32.x
@@ -0,0 +1,58 @@
+extern void abort (void);
+
+int32x4_t
+test_vextq_s32_1 (int32x4_t a, int32x4_t b)
+{
+  return vextq_s32 (a, b, 1);
+}
+
+int32x4_t
+test_vextq_s32_2 (int32x4_t a, int32x4_t b)
+{
+  return vextq_s32 (a, b, 2);
+}
+
+int32x4_t
+test_vextq_s32_3 (int32x4_t a, int32x4_t b)
+{
+  return vextq_s32 (a, b, 3);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i, off;
+  int32_t arr1[] = {0, 1, 2, 3};
+  int32x4_t in1 = vld1q_s32 (arr1);
+  int32_t arr2[] = {4, 5, 6, 7};
+  int32x4_t in2 = vld1q_s32 (arr2);
+  int32_t exp[4];
+  int32x4_t expected;
+  int32x4_t actual = test_vextq_s32_1 (in1, in2);
+
+  for (i = 0; i < 4; i++)
+    exp[i] = i + 1;
+  expected = vld1q_s32 (exp);
+  for (i = 0; i < 4; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_s32_2 (in1, in2);
+  for (i = 0; i < 4; i++)
+    exp[i] = i + 2;
+  expected = vld1q_s32 (exp);
+  for (i = 0; i < 4; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_s32_3 (in1, in2);
+  for (i = 0; i < 4; i++)
+    exp[i] = i + 3;
+  expected = vld1q_s32 (exp);
+  for (i = 0; i < 4; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u32.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u32.x
@@ -0,0 +1,58 @@
+extern void abort (void);
+
+uint32x4_t
+test_vextq_u32_1 (uint32x4_t a, uint32x4_t b)
+{
+  return vextq_u32 (a, b, 1);
+}
+
+uint32x4_t
+test_vextq_u32_2 (uint32x4_t a, uint32x4_t b)
+{
+  return vextq_u32 (a, b, 2);
+}
+
+uint32x4_t
+test_vextq_u32_3 (uint32x4_t a, uint32x4_t b)
+{
+  return vextq_u32 (a, b, 3);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i, off;
+  uint32_t arr1[] = {0, 1, 2, 3};
+  uint32x4_t in1 = vld1q_u32 (arr1);
+  uint32_t arr2[] = {4, 5, 6, 7};
+  uint32x4_t in2 = vld1q_u32 (arr2);
+  uint32_t exp[4];
+  uint32x4_t expected;
+  uint32x4_t actual = test_vextq_u32_1 (in1, in2);
+
+  for (i = 0; i < 4; i++)
+    exp[i] = i + 1;
+  expected = vld1q_u32 (exp);
+  for (i = 0; i < 4; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_u32_2 (in1, in2);
+  for (i = 0; i < 4; i++)
+    exp[i] = i + 2;
+  expected = vld1q_u32 (exp);
+  for (i = 0; i < 4; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  actual = test_vextq_u32_3 (in1, in2);
+  for (i = 0; i < 4; i++)
+    exp[i] = i + 3;
+  expected = vld1q_u32 (exp);
+  for (i = 0; i < 4; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u64_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u64_1.c
@@ -0,0 +1,10 @@
+/* Test the `vextQu64' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+#include "extq_u64.x"
+
+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.8\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp16_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp16_1.c
@@ -0,0 +1,11 @@
+/* Test the `vzip_p16' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vzipp16.x"
+
+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns32_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns32_1.c
@@ -0,0 +1,11 @@
+/* Test the `vtrn_s32' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vtrns32.x"
+
+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qp8_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qp8_1.c
@@ -0,0 +1,10 @@
+/* Test the `vrev16q_p8' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vrev16qp8.x"
+
+/* { dg-final { scan-assembler-times "rev16\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs32.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs32.x
@@ -0,0 +1,26 @@
+extern void abort (void);
+
+int32x4x2_t
+test_vuzpqs32 (int32x4_t _a, int32x4_t _b)
+{
+  return vuzpq_s32 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  int32_t first[] = {1, 2, 3, 4};
+  int32_t second[] = {5, 6, 7, 8};
+  int32x4x2_t result = test_vuzpqs32 (vld1q_s32 (first), vld1q_s32 (second));
+  int32_t exp1[] = {1, 3, 5, 7};
+  int32_t exp2[] = {2, 4, 6, 8};
+  int32x4_t expect1 = vld1q_s32 (exp1);
+  int32x4_t expect2 = vld1q_s32 (exp2);
+
+  for (i = 0; i < 4; i++)
+    if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu32_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu32_1.c
@@ -0,0 +1,11 @@
+/* Test the `vzip_u32' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vzipu32.x"
+
+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p16.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p16.x
@@ -0,0 +1,22 @@
+extern void abort (void);
+
+poly16x4_t
+test_vrev32p16 (poly16x4_t _arg)
+{
+  return vrev32_p16 (_arg);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  poly16x4_t inorder = {1, 2, 3, 4};
+  poly16x4_t reversed = test_vrev32p16 (inorder);
+  poly16x4_t expected = {2, 1, 4, 3};
+
+  for (i = 0; i < 4; i++)
+    if (reversed[i] != expected[i])
+      abort ();
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu32.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu32.x
@@ -0,0 +1,26 @@
+extern void abort (void);
+
+uint32x4x2_t
+test_vuzpqu32 (uint32x4_t _a, uint32x4_t _b)
+{
+  return vuzpq_u32 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  uint32_t first[] = {1, 2, 3, 4};
+  uint32_t second[] = {5, 6, 7, 8};
+  uint32x4x2_t result = test_vuzpqu32 (vld1q_u32 (first), vld1q_u32 (second));
+  uint32_t exp1[] = {1, 3, 5, 7};
+  uint32_t exp2[] = {2, 4, 6, 8};
+  uint32x4_t expect1 = vld1q_u32 (exp1);
+  uint32x4_t expect2 = vld1q_u32 (exp2);
+
+  for (i = 0; i < 4; i++)
+    if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrbit_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrbit_1.c
@@ -0,0 +1,56 @@
+/* { dg-do run } */
+/* { dg-options "-O2 --save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+
+extern void abort (void);
+
+uint64_t in1 = 0x0123456789abcdefULL;
+uint64_t expected1 = 0x80c4a2e691d5b3f7ULL;
+
+#define TEST8(BASETYPE, SUFFIX)						\
+void test8_##SUFFIX ()							\
+{									\
+  BASETYPE##8x8_t out = vrbit_##SUFFIX (vcreate_##SUFFIX (in1));	\
+  uint64_t res = vget_lane_u64 (vreinterpret_u64_##SUFFIX (out), 0);	\
+  if (res != expected1) abort ();					\
+}
+
+uint64_t in2 = 0xdeadbeefcafebabeULL;
+uint64_t expected2 = 0x7bb57df7537f5d7dULL;
+
+#define TEST16(BASETYPE, SUFFIX)					\
+void test16_##SUFFIX ()							\
+{									\
+  BASETYPE##8x16_t in = vcombine_##SUFFIX (vcreate_##SUFFIX (in1),	\
+					   vcreate_##SUFFIX (in2));	\
+  uint64x2_t res = vreinterpretq_u64_##SUFFIX (vrbitq_##SUFFIX (in));	\
+  uint64_t res1 = vgetq_lane_u64 (res, 0);				\
+  uint64_t res2 = vgetq_lane_u64 (res, 1);				\
+  if (res1 != expected1 || res2 != expected2) abort ();			\
+}
+
+TEST8 (poly, p8);
+TEST8 (int, s8);
+TEST8 (uint, u8);
+
+TEST16 (poly, p8);
+TEST16 (int, s8);
+TEST16 (uint, u8);
+
+int
+main (int argc, char **argv)
+{
+  test8_p8 ();
+  test8_s8 ();
+  test8_u8 ();
+  test16_p8 ();
+  test16_s8 ();
+  test16_u8 ();
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times "rbit\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\]" 3 } } */
+/* { dg-final { scan-assembler-times "rbit\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\]" 3 } } */
+
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s32_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s32_1.c
@@ -0,0 +1,10 @@
+/* Test the `vexts32' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+#include "ext_s32.x"
+
+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#\[0-9\]+\(?:.4)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu8_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu8_1.c
@@ -0,0 +1,11 @@
+/* Test the `vtrnq_u8' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vtrnqu8.x"
+
+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs8.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs8.x
@@ -0,0 +1,22 @@
+extern void abort (void);
+
+int8x16_t
+test_vrev32qs8 (int8x16_t _arg)
+{
+  return vrev32q_s8 (_arg);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  int8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+  int8x16_t reversed = test_vrev32qs8 (inorder);
+  int8x16_t expected = {4, 3, 2, 1, 8, 7, 6, 5, 12, 11, 10, 9, 16, 15, 14, 13};
+
+  for (i = 0; i < 16; i++)
+    if (reversed[i] != expected[i])
+      abort ();
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qs8.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qs8.x
@@ -0,0 +1,22 @@
+extern void abort (void);
+
+int8x16_t
+test_vrev16qs8 (int8x16_t _arg)
+{
+  return vrev16q_s8 (_arg);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  int8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+  int8x16_t reversed = test_vrev16qs8 (inorder);
+  int8x16_t expected = {2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15};
+
+  for (i = 0; i < 16; i++)
+    if (reversed[i] != expected[i])
+      abort ();
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s16.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s16.x
@@ -0,0 +1,22 @@
+extern void abort (void);
+
+int16x4_t
+test_vrev64s16 (int16x4_t _arg)
+{
+  return vrev64_s16 (_arg);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  int16x4_t inorder = {1, 2, 3, 4};
+  int16x4_t reversed = test_vrev64s16 (inorder);
+  int16x4_t expected = {4, 3, 2, 1};
+
+  for (i = 0; i < 4; i++)
+    if (reversed[i] != expected[i])
+      abort ();
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s8_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s8_1.c
@@ -0,0 +1,10 @@
+/* Test the `vextQs8' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+#include "extq_s8.x"
+
+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#?\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 15 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u16.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u16.x
@@ -0,0 +1,22 @@
+extern void abort (void);
+
+uint16x4_t
+test_vrev64u16 (uint16x4_t _arg)
+{
+  return vrev64_u16 (_arg);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  uint16x4_t inorder = {1, 2, 3, 4};
+  uint16x4_t reversed = test_vrev64u16 (inorder);
+  uint16x4_t expected = {4, 3, 2, 1};
+
+  for (i = 0; i < 4; i++)
+    if (reversed[i] != expected[i])
+      abort ();
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp16.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp16.x
@@ -0,0 +1,26 @@
+extern void abort (void);
+
+poly16x4x2_t
+test_vuzpp16 (poly16x4_t _a, poly16x4_t _b)
+{
+  return vuzp_p16 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  poly16_t first[] = {1, 2, 3, 4};
+  poly16_t second[] = {5, 6, 7, 8};
+  poly16x4x2_t result = test_vuzpp16 (vld1_p16 (first), vld1_p16 (second));
+  poly16_t exp1[] = {1, 3, 5, 7};
+  poly16_t exp2[] = {2, 4, 6, 8};
+  poly16x4_t expect1 = vld1_p16 (exp1);
+  poly16x4_t expect2 = vld1_p16 (exp2);
+
+  for (i = 0; i < 4; i++)
+    if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqf32_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqf32_1.c
@@ -0,0 +1,11 @@
+/* Test the `vuzpq_f32' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vuzpqf32.x"
+
+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp8_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp8_1.c
@@ -0,0 +1,11 @@
+/* Test the `vzip_p8' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vzipp8.x"
+
+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp16_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp16_1.c
@@ -0,0 +1,11 @@
+/* Test the `vtrnq_p16' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vtrnqp16.x"
+
+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp16.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp16.x
@@ -0,0 +1,22 @@
+extern void abort (void);
+
+poly16x8_t
+test_vrev32qp16 (poly16x8_t _arg)
+{
+  return vrev32q_p16 (_arg);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  poly16x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8};
+  poly16x8_t reversed = test_vrev32qp16 (inorder);
+  poly16x8_t expected = {2, 1, 4, 3, 6, 5, 8, 7};
+
+  for (i = 0; i < 8; i++)
+    if (reversed[i] != expected[i])
+      abort ();
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu32_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu32_1.c
@@ -0,0 +1,11 @@
+/* Test the `vtrnq_u32' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vtrnqu32.x"
+
+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs8.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs8.x
@@ -0,0 +1,27 @@
+extern void abort (void);
+
+int8x16x2_t
+test_vuzpqs8 (int8x16_t _a, int8x16_t _b)
+{
+  return vuzpq_s8 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  int8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+  int8_t second[] =
+      {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32};
+  int8x16x2_t result = test_vuzpqs8 (vld1q_s8 (first), vld1q_s8 (second));
+  int8_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31};
+  int8_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32};
+  int8x16_t expect1 = vld1q_s8 (exp1);
+  int8x16_t expect2 = vld1q_s8 (exp2);
+
+  for (i = 0; i < 16; i++)
+    if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs32.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs32.x
@@ -0,0 +1,27 @@
+extern void abort (void);
+
+int32x4x2_t
+test_vzipqs32 (int32x4_t _a, int32x4_t _b)
+{
+  return vzipq_s32 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  int32_t first[] = {1, 2, 3, 4};
+  int32_t second[] = {5, 6, 7, 8};
+  int32x4x2_t result = test_vzipqs32 (vld1q_s32 (first), vld1q_s32 (second));
+  int32x4_t res1 = result.val[0], res2 = result.val[1];
+  int32_t exp1[] = {1, 5, 2, 6};
+  int32_t exp2[] = {3, 7, 4, 8};
+  int32x4_t expected1 = vld1q_s32 (exp1);
+  int32x4_t expected2 = vld1q_s32 (exp2);
+
+  for (i = 0; i < 4; i++)
+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs16_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs16_1.c
@@ -0,0 +1,10 @@
+/* Test the `vrev64q_s16' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vrev64qs16.x"
+
+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.8h, ?v\[0-9\]+.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s8.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s8.x
@@ -0,0 +1,22 @@
+extern void abort (void);
+
+int8x8_t
+test_vrev32s8 (int8x8_t _arg)
+{
+  return vrev32_s8 (_arg);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  int8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8};
+  int8x8_t reversed = test_vrev32s8 (inorder);
+  int8x8_t expected = {4, 3, 2, 1, 8, 7, 6, 5};
+
+  for (i = 0; i < 8; i++)
+    if (reversed[i] != expected[i])
+      abort ();
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p16_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p16_1.c
@@ -0,0 +1,10 @@
+/* Test the `vextQp16' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+#include "extq_p16.x"
+
+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu32.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu32.x
@@ -0,0 +1,27 @@
+extern void abort (void);
+
+uint32x4x2_t
+test_vzipqu32 (uint32x4_t _a, uint32x4_t _b)
+{
+  return vzipq_u32 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  uint32_t first[] = {1, 2, 3, 4};
+  uint32_t second[] = {5, 6, 7, 8};
+  uint32x4x2_t result = test_vzipqu32 (vld1q_u32 (first), vld1q_u32 (second));
+  uint32x4_t res1 = result.val[0], res2 = result.val[1];
+  uint32_t exp1[] = {1, 5, 2, 6};
+  uint32_t exp2[] = {3, 7, 4, 8};
+  uint32x4_t expected1 = vld1q_u32 (exp1);
+  uint32x4_t expected2 = vld1q_u32 (exp2);
+
+  for (i = 0; i < 4; i++)
+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u32_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u32_1.c
@@ -0,0 +1,10 @@
+/* Test the `vextQu32' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+#include "extq_u32.x"
+
+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.4)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p16_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p16_1.c
@@ -0,0 +1,10 @@
+/* Test the `vrev32_p16' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vrev32p16.x"
+
+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.4h, ?v\[0-9\]+.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_f32.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_f32.x
@@ -0,0 +1,30 @@
+extern void abort (void);
+
+float32x2_t
+test_vext_f32_1 (float32x2_t a, float32x2_t b)
+{
+  return vext_f32 (a, b, 1);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i, off;
+  float32_t arr1[] = {0, 1};
+  float32x2_t in1 = vld1_f32 (arr1);
+  float32_t arr2[] = {2, 3};
+  float32x2_t in2 = vld1_f32 (arr2);
+  float32_t exp[2];
+  float32x2_t expected;
+  float32x2_t actual = test_vext_f32_1 (in1, in2);
+
+  for (i = 0; i < 2; i++)
+    exp[i] = i + 1;
+  expected = vld1_f32 (exp);
+  for (i = 0; i < 2; i++)
+    if (actual[i] != expected[i])
+      abort ();
+
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_f64_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_f64_1.c
@@ -0,0 +1,25 @@
+/* Test the `vextf64' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+extern void abort (void);
+
+int
+main (int argc, char **argv)
+{
+  int i, off;
+  float64x1_t in1 = {0};
+  float64x1_t in2 = {1};
+  float64x1_t actual = vext_f64 (in1, in2, 0);
+  if (actual != in1)
+    abort ();
+
+  return 0;
+}
+
+/* Do not scan-assembler.  An EXT instruction could be emitted, but would merely
+   return its first argument, so it is legitimate to optimize it out.  */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpf32_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpf32_1.c
@@ -0,0 +1,11 @@
+/* Test the `vuzp_f32' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vuzpf32.x"
+
+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu16_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu16_1.c
@@ -0,0 +1,11 @@
+/* Test the `vuzpq_u16' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vuzpqu16.x"
+
+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu8_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu8_1.c
@@ -0,0 +1,11 @@
+/* Test the `vuzp_u8' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vuzpu8.x"
+
+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqf32_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqf32_1.c
@@ -0,0 +1,11 @@
+/* Test the `vzipq_f32' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vzipqf32.x"
+
+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vqdmullh_laneq_s16.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vqdmullh_laneq_s16.c
@@ -0,0 +1,15 @@
+/* Test the vqdmullh_laneq_s16 AArch64 SIMD intrinsic.  */
+
+/* { dg-do compile } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+
+int32_t
+t_vqdmullh_laneq_s16 (int16_t a, int16x8_t b)
+{
+  return vqdmullh_laneq_s16 (a, b, 0);
+}
+
+/* { dg-final { scan-assembler-times "sqdmull\[ \t\]+\[sS\]\[0-9\]+, ?\[hH\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[hH\]\\\[0\\\]\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s16_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s16_1.c
@@ -0,0 +1,10 @@
+/* Test the `vrev64_s16' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vrev64s16.x"
+
+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.4h, ?v\[0-9\]+.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns32.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns32.x
@@ -0,0 +1,27 @@
+extern void abort (void);
+
+int32x2x2_t
+test_vtrns32 (int32x2_t _a, int32x2_t _b)
+{
+  return vtrn_s32 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  int32_t first[] = {1, 2};
+  int32_t second[] = {3, 4};
+  int32x2x2_t result = test_vtrns32 (vld1_s32 (first), vld1_s32 (second));
+  int32x2_t res1 = result.val[0], res2 = result.val[1];
+  int32_t exp1[] = {1, 3};
+  int32_t exp2[] = {2, 4};
+  int32x2_t expected1 = vld1_s32 (exp1);
+  int32x2_t expected2 = vld1_s32 (exp2);
+
+  for (i = 0; i < 2; i++)
+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qu8_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qu8_1.c
@@ -0,0 +1,10 @@
+/* Test the `vrev16q_u8' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vrev16qu8.x"
+
+/* { dg-final { scan-assembler-times "rev16\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzips16.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzips16.x
@@ -0,0 +1,27 @@
+extern void abort (void);
+
+int16x4x2_t
+test_vzips16 (int16x4_t _a, int16x4_t _b)
+{
+  return vzip_s16 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  int16_t first[] = {1, 2, 3, 4};
+  int16_t second[] = {5, 6, 7, 8};
+  int16x4x2_t result = test_vzips16 (vld1_s16 (first), vld1_s16 (second));
+  int16x4_t res1 = result.val[0], res2 = result.val[1];
+  int16_t exp1[] = {1, 5, 2, 6};
+  int16_t exp2[] = {3, 7, 4, 8};
+  int16x4_t expected1 = vld1_s16 (exp1);
+  int16x4_t expected2 = vld1_s16 (exp2);
+
+  for (i = 0; i < 4; i++)
+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs8_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs8_1.c
@@ -0,0 +1,10 @@
+/* Test the `vrev64q_s8' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vrev64qs8.x"
+
+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p8_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p8_1.c
@@ -0,0 +1,10 @@
+/* Test the `vextQp8' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+#include "extq_p8.x"
+
+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#?\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 15 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu32.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu32.x
@@ -0,0 +1,27 @@
+extern void abort (void);
+
+uint32x2x2_t
+test_vtrnu32 (uint32x2_t _a, uint32x2_t _b)
+{
+  return vtrn_u32 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  uint32_t first[] = {1, 2};
+  uint32_t second[] = {3, 4};
+  uint32x2x2_t result = test_vtrnu32 (vld1_u32 (first), vld1_u32 (second));
+  uint32x2_t res1 = result.val[0], res2 = result.val[1];
+  uint32_t exp1[] = {1, 3};
+  uint32_t exp2[] = {2, 4};
+  uint32x2_t expected1 = vld1_u32 (exp1);
+  uint32x2_t expected2 = vld1_u32 (exp2);
+
+  for (i = 0; i < 2; i++)
+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu16.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu16.x
@@ -0,0 +1,27 @@
+extern void abort (void);
+
+uint16x4x2_t
+test_vzipu16 (uint16x4_t _a, uint16x4_t _b)
+{
+  return vzip_u16 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  uint16_t first[] = {1, 2, 3, 4};
+  uint16_t second[] = {5, 6, 7, 8};
+  uint16x4x2_t result = test_vzipu16 (vld1_u16 (first), vld1_u16 (second));
+  uint16x4_t res1 = result.val[0], res2 = result.val[1];
+  uint16_t exp1[] = {1, 5, 2, 6};
+  uint16_t exp2[] = {3, 7, 4, 8};
+  uint16x4_t expected1 = vld1_u16 (exp1);
+  uint16x4_t expected2 = vld1_u16 (exp2);
+
+  for (i = 0; i < 4; i++)
+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu16_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu16_1.c
@@ -0,0 +1,11 @@
+/* Test the `vuzp_u16' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vuzpu16.x"
+
+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s8_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s8_1.c
@@ -0,0 +1,10 @@
+/* Test the `vrev32_s8' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vrev32s8.x"
+
+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu16_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu16_1.c
@@ -0,0 +1,10 @@
+/* Test the `vrev32q_u16' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vrev32qu16.x"
+
+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.8h, ?v\[0-9\]+.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnf32_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnf32_1.c
@@ -0,0 +1,11 @@
+/* Test the `vtrn_f32' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vtrnf32.x"
+
+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu16_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu16_1.c
@@ -0,0 +1,11 @@
+/* Test the `vzipq_u16' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vzipqu16.x"
+
+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu8_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu8_1.c
@@ -0,0 +1,11 @@
+/* Test the `vzip_u8' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+#include "vzipu8.x"
+
+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp16.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp16.x
@@ -0,0 +1,27 @@
+extern void abort (void);
+
+poly16x8x2_t
+test_vtrnqp16 (poly16x8_t _a, poly16x8_t _b)
+{
+  return vtrnq_p16 (_a, _b);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  poly16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
+  poly16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
+  poly16x8x2_t result = test_vtrnqp16 (vld1q_p16 (first), vld1q_p16 (second));
+  poly16x8_t res1 = result.val[0], res2 = result.val[1];
+  poly16_t exp1[] = {1, 9, 3, 11, 5, 13, 7, 15};
+  poly16_t exp2[] = {2, 10, 4, 12, 6, 14, 8, 16};
+  poly16x8_t expected1 = vld1q_p16 (exp1);
+  poly16x8_t expected2 = vld1q_p16 (exp2);
+
+  for (i = 0; i < 8; i++)
+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
+      abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/int_comparisons_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/int_comparisons_1.c
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-inline" } */
+
+/* Scan-assembler test, so, incorporate as little other code as possible.  */
+
+#include "arm_neon.h"
+#include "int_comparisons.x"
+
+/* Operations on all 18 integer types:  (q?)_[su](8|16|32|64), d_[su]64.
+   (d?)_[us]64 generate regs of form 'd0' rather than e.g. 'v0.2d'.  */
+/* { dg-final { scan-assembler-times "\[ \t\]cmeq\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*#?0" 14 } } */
+/* { dg-final { scan-assembler-times "\[ \t\]cmeq\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]*#?0" 4 } } */
+/* { dg-final { scan-assembler-times "\[ \t\]cmeq\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\]" 14 } } */
+/* { dg-final { scan-assembler-times "\[ \t\]cmeq\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]+d\[0-9\]+" 4 } } */
+/* { dg-final { scan-assembler-times "\[ \t\]cmtst\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\]" 14 } } */
+/* { dg-final { scan-assembler-times "\[ \t\]cmtst\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]+d\[0-9\]+" 4 } } */
+
+/* vcge + vcle both implemented with cmge (signed) or cmhs (unsigned).  */
+/* { dg-final { scan-assembler-times "\[ \t\]cmge\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\]" 14 } } */
+/* { dg-final { scan-assembler-times "\[ \t\]cmge\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]+d\[0-9\]+" 4 } } */
+/* { dg-final { scan-assembler-times "\[ \t\]cmhs\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\]" 14 } } */
+/* { dg-final { scan-assembler-times "\[ \t\]cmhs\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]+d\[0-9\]+" 4 } } */
+
+/* vcgt + vclt both implemented with cmgt (signed) or cmhi (unsigned).  */
+/* { dg-final { scan-assembler-times "\[ \t\]cmgt\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\]" 14 } } */
+/* { dg-final { scan-assembler-times "\[ \t\]cmgt\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]+d\[0-9\]+" 4 } } */
+/* { dg-final { scan-assembler-times "\[ \t\]cmhi\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\]" 14 } } */
+/* { dg-final { scan-assembler-times "\[ \t\]cmhi\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]+d\[0-9\]+" 4 } } */
+
+/* Comparisons against immediate zero, on the 8 signed integer types only.  */
+
+/* { dg-final { scan-assembler-times "\[ \t\]cmge\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*#?0" 7 } } */
+/*  For int64_t and int64x1_t, combine_simplify_rtx failure of
+    https://gcc.gnu.org/ml/gcc/2014-06/msg00253.html
+    prevents generation of cmge....#0, instead producing mvn + sshr.  */
+/* { #dg-final { scan-assembler-times "\[ \t\]cmge\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]*#?0" 2 } } */
+/* { dg-final { scan-assembler-times "\[ \t\]cmgt\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*#?0" 7 } } */
+/* { dg-final { scan-assembler-times "\[ \t\]cmgt\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]*#?0" 2 } } */
+/* { dg-final { scan-assembler-times "\[ \t\]cmle\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*#?0" 7 } } */
+/* { dg-final { scan-assembler-times "\[ \t\]cmle\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]*#?0" 2 } } */
+/* { dg-final { scan-assembler-times "\[ \t\]cmlt\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*#?0" 7 } } */
+/* For int64_t and int64x1_t, cmlt ... #0 and sshr ... #63 are equivalent,
+   so allow either.  cmgez issue above results in extra 2 * sshr....63.  */
+/* { dg-final { scan-assembler-times "\[ \t\](?:cmlt|sshr)\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]*#?(?:0|63)" 4 } } */
+
+// All should have been compiled into single insns without inverting result:
+/* { dg-final { scan-assembler-not "\[ \t\]not\[ \t\]" } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp16.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp16.x
@@ -0,0 +1,22 @@
+extern void abort (void);
+
+poly16x8_t
+test_vrev64qp16 (poly16x8_t _arg)
+{
+  return vrev64q_p16 (_arg);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  poly16x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8};
+  poly16x8_t reversed = test_vrev64qp16 (inorder);
+  poly16x8_t expected = {4, 3, 2, 1, 8, 7, 6, 5};
+
+  for (i = 0; i < 8; i++)
+    if (reversed[i] != expected[i])
+      abort ();
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_f32_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_f32_1.c
@@ -0,0 +1,10 @@
+/* Test the `vextf32' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-save-temps -O3 -fno-inline" } */
+
+#include "arm_neon.h"
+#include "ext_f32.x"
+
+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#\[0-9\]+\(?:.4)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64f32.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64f32.x
@@ -0,0 +1,22 @@
+extern void abort (void);
+
+float32x2_t
+test_vrev64f32 (float32x2_t _arg)
+{
+  return vrev64_f32 (_arg);
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  float32x2_t inorder = {1, 2};
+  float32x2_t reversed = test_vrev64f32 (inorder);
+  float32x2_t expected = {2, 1};
+
+  for (i = 0; i < 2; i++)
+    if (reversed[i] != expected[i])
+      abort ();
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/vdup_lane_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/vdup_lane_1.c
@@ -0,0 +1,430 @@
+/* Test vdup_lane intrinsics work correctly.  */
+/* { dg-do run } */
+/* { dg-options "--save-temps -O1" } */
+
+#include <arm_neon.h>
+
+extern void abort (void);
+
+float32x2_t __attribute__ ((noinline))
+wrap_vdup_lane_f32_0 (float32x2_t a)
+{
+  return vdup_lane_f32 (a, 0);
+}
+
+float32x2_t __attribute__ ((noinline))
+wrap_vdup_lane_f32_1 (float32x2_t a)
+{
+  return vdup_lane_f32 (a, 1);
+}
+
+int __attribute__ ((noinline))
+test_vdup_lane_f32 ()
+{
+  float32x2_t a;
+  float32x2_t b;
+  int i;
+  float32_t c[2] = { 0.0 , 3.14 };
+  float32_t d[2];
+
+  a = vld1_f32 (c);
+  b = wrap_vdup_lane_f32_0 (a);
+  vst1_f32 (d, b);
+  for (i = 0; i < 2; i++)
+    if (c[0] != d[i])
+      return 1;
+
+  b = wrap_vdup_lane_f32_1 (a);
+  vst1_f32 (d, b);
+  for (i = 0; i < 2; i++)
+    if (c[1] != d[i])
+      return 1;
+  return 0;
+}
+
+float32x4_t __attribute__ ((noinline))
+wrap_vdupq_lane_f32_0 (float32x2_t a)
+{
+  return vdupq_lane_f32 (a, 0);
+}
+
+float32x4_t __attribute__ ((noinline))
+wrap_vdupq_lane_f32_1 (float32x2_t a)
+{
+  return vdupq_lane_f32 (a, 1);
+}
+
+int __attribute__ ((noinline))
+test_vdupq_lane_f32 ()
+{
+  float32x2_t a;
+  float32x4_t b;
+  int i;
+  float32_t c[2] = { 0.0 , 3.14 };
+  float32_t d[4];
+
+  a = vld1_f32 (c);
+  b = wrap_vdupq_lane_f32_0 (a);
+  vst1q_f32 (d, b);
+  for (i = 0; i < 4; i++)
+    if (c[0] != d[i])
+      return 1;
+
+  b = wrap_vdupq_lane_f32_1 (a);
+  vst1q_f32 (d, b);
+  for (i = 0; i < 4; i++)
+    if (c[1] != d[i])
+      return 1;
+  return 0;
+}
+
+int8x8_t __attribute__ ((noinline))
+wrap_vdup_lane_s8_0 (int8x8_t a)
+{
+  return vdup_lane_s8 (a, 0);
+}
+
+int8x8_t __attribute__ ((noinline))
+wrap_vdup_lane_s8_1 (int8x8_t a)
+{
+  return vdup_lane_s8 (a, 1);
+}
+
+int __attribute__ ((noinline))
+test_vdup_lane_s8 ()
+{
+  int8x8_t a;
+  int8x8_t b;
+  int i;
+  /* Only two first cases are interesting.  */
+  int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
+  int8_t d[8];
+
+  a = vld1_s8 (c);
+  b = wrap_vdup_lane_s8_0 (a);
+  vst1_s8 (d, b);
+  for (i = 0; i < 8; i++)
+    if (c[0] != d[i])
+      return 1;
+
+  b = wrap_vdup_lane_s8_1 (a);
+  vst1_s8 (d, b);
+  for (i = 0; i < 8; i++)
+    if (c[1] != d[i])
+      return 1;
+  return 0;
+}
+
+int8x16_t __attribute__ ((noinline))
+wrap_vdupq_lane_s8_0 (int8x8_t a)
+{
+  return vdupq_lane_s8 (a, 0);
+}
+
+int8x16_t __attribute__ ((noinline))
+wrap_vdupq_lane_s8_1 (int8x8_t a)
+{
+  return vdupq_lane_s8 (a, 1);
+}
+
+int __attribute__ ((noinline))
+test_vdupq_lane_s8 ()
+{
+  int8x8_t a;
+  int8x16_t b;
+  int i;
+  /* Only two first cases are interesting.  */
+  int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
+  int8_t d[16];
+
+  a = vld1_s8 (c);
+  b = wrap_vdupq_lane_s8_0 (a);
+  vst1q_s8 (d, b);
+  for (i = 0; i < 16; i++)
+    if (c[0] != d[i])
+      return 1;
+
+  b = wrap_vdupq_lane_s8_1 (a);
+  vst1q_s8 (d, b);
+  for (i = 0; i < 16; i++)
+    if (c[1] != d[i])
+      return 1;
+  return 0;
+}
+
+int16x4_t __attribute__ ((noinline))
+wrap_vdup_lane_s16_0 (int16x4_t a)
+{
+  return vdup_lane_s16 (a, 0);
+}
+
+int16x4_t __attribute__ ((noinline))
+wrap_vdup_lane_s16_1 (int16x4_t a)
+{
+  return vdup_lane_s16 (a, 1);
+}
+
+int __attribute__ ((noinline))
+test_vdup_lane_s16 ()
+{
+  int16x4_t a;
+  int16x4_t b;
+  int i;
+  /* Only two first cases are interesting.  */
+  int16_t c[4] = { 0, 1, 2, 3 };
+  int16_t d[4];
+
+  a = vld1_s16 (c);
+  b = wrap_vdup_lane_s16_0 (a);
+  vst1_s16 (d, b);
+  for (i = 0; i < 4; i++)
+    if (c[0] != d[i])
+      return 1;
+
+  b = wrap_vdup_lane_s16_1 (a);
+  vst1_s16 (d, b);
+  for (i = 0; i < 4; i++)
+    if (c[1] != d[i])
+      return 1;
+  return 0;
+}
+
+int16x8_t __attribute__ ((noinline))
+wrap_vdupq_lane_s16_0 (int16x4_t a)
+{
+  return vdupq_lane_s16 (a, 0);
+}
+
+int16x8_t __attribute__ ((noinline))
+wrap_vdupq_lane_s16_1 (int16x4_t a)
+{
+  return vdupq_lane_s16 (a, 1);
+}
+
+int __attribute__ ((noinline))
+test_vdupq_lane_s16 ()
+{
+  int16x4_t a;
+  int16x8_t b;
+  int i;
+  /* Only two first cases are interesting.  */
+  int16_t c[4] = { 0, 1, 2, 3 };
+  int16_t d[8];
+
+  a = vld1_s16 (c);
+  b = wrap_vdupq_lane_s16_0 (a);
+  vst1q_s16 (d, b);
+  for (i = 0; i < 8; i++)
+    if (c[0] != d[i])
+      return 1;
+
+  b = wrap_vdupq_lane_s16_1 (a);
+  vst1q_s16 (d, b);
+  for (i = 0; i < 8; i++)
+    if (c[1] != d[i])
+      return 1;
+  return 0;
+}
+
+int32x2_t __attribute__ ((noinline))
+wrap_vdup_lane_s32_0 (int32x2_t a)
+{
+  return vdup_lane_s32 (a, 0);
+}
+
+int32x2_t __attribute__ ((noinline))
+wrap_vdup_lane_s32_1 (int32x2_t a)
+{
+  return vdup_lane_s32 (a, 1);
+}
+
+int __attribute__ ((noinline))
+test_vdup_lane_s32 ()
+{
+  int32x2_t a;
+  int32x2_t b;
+  int i;
+  int32_t c[2] = { 0, 1 };
+  int32_t d[2];
+
+  a = vld1_s32 (c);
+  b = wrap_vdup_lane_s32_0 (a);
+  vst1_s32 (d, b);
+  for (i = 0; i < 2; i++)
+    if (c[0] != d[i])
+      return 1;
+
+  b = wrap_vdup_lane_s32_1 (a);
+  vst1_s32 (d, b);
+  for (i = 0; i < 2; i++)
+    if (c[1] != d[i])
+      return 1;
+  return 0;
+}
+
+int32x4_t __attribute__ ((noinline))
+wrap_vdupq_lane_s32_0 (int32x2_t a)
+{
+  return vdupq_lane_s32 (a, 0);
+}
+
+int32x4_t __attribute__ ((noinline))
+wrap_vdupq_lane_s32_1 (int32x2_t a)
+{
+  return vdupq_lane_s32 (a, 1);
+}
+
+int __attribute__ ((noinline))
+test_vdupq_lane_s32 ()
+{
+  int32x2_t a;
+  int32x4_t b;
+  int i;
+  int32_t c[2] = { 0, 1 };
+  int32_t d[4];
+
+  a = vld1_s32 (c);
+  b = wrap_vdupq_lane_s32_0 (a);
+  vst1q_s32 (d, b);
+  for (i = 0; i < 4; i++)
+    if (c[0] != d[i])
+      return 1;
+
+  b = wrap_vdupq_lane_s32_1 (a);
+  vst1q_s32 (d, b);
+  for (i = 0; i < 4; i++)
+    if (c[1] != d[i])
+      return 1;
+  return 0;
+}
+
+int64x1_t __attribute__ ((noinline))
+wrap_vdup_lane_s64_0 (int64x1_t a)
+{
+  return vdup_lane_s64 (a, 0);
+}
+
+int64x1_t __attribute__ ((noinline))
+wrap_vdup_lane_s64_1 (int64x1_t a)
+{
+  return vdup_lane_s64 (a, 1);
+}
+
+int __attribute__ ((noinline))
+test_vdup_lane_s64 ()
+{
+  int64x1_t a;
+  int64x1_t b;
+  int64_t c[1];
+  int64_t d[1];
+
+  c[0] = 0;
+  a = vld1_s64 (c);
+  b = wrap_vdup_lane_s64_0 (a);
+  vst1_s64 (d, b);
+  if (c[0] != d[0])
+    return 1;
+
+  c[0] = 1;
+  a = vld1_s64 (c);
+  b = wrap_vdup_lane_s64_1 (a);
+  vst1_s64 (d, b);
+  if (c[0] != d[0])
+    return 1;
+  return 0;
+}
+
+int64x2_t __attribute__ ((noinline))
+wrap_vdupq_lane_s64_0 (int64x1_t a)
+{
+  return vdupq_lane_s64 (a, 0);
+}
+
+int64x2_t __attribute__ ((noinline))
+wrap_vdupq_lane_s64_1 (int64x1_t a)
+{
+  return vdupq_lane_s64 (a, 1);
+}
+
+int __attribute__ ((noinline))
+test_vdupq_lane_s64 ()
+{
+  int64x1_t a;
+  int64x2_t b;
+  int i;
+  int64_t c[1];
+  int64_t d[2];
+
+  c[0] = 0;
+  a = vld1_s64 (c);
+  b = wrap_vdupq_lane_s64_0 (a);
+  vst1q_s64 (d, b);
+  for (i = 0; i < 2; i++)
+    if (c[0] != d[i])
+      return 1;
+
+  c[0] = 1;
+  a = vld1_s64 (c);
+  b = wrap_vdupq_lane_s64_1 (a);
+  vst1q_s64 (d, b);
+  for (i = 0; i < 2; i++)
+    if (c[0] != d[i])
+      return 1;
+  return 0;
+}
+
+int
+main ()
+{
+
+  if (test_vdup_lane_f32 ())
+    abort ();
+  if (test_vdup_lane_s8 ())
+    abort ();
+  if (test_vdup_lane_s16 ())
+    abort ();
+  if (test_vdup_lane_s32 ())
+    abort ();
+  if (test_vdup_lane_s64 ())
+    abort ();
+  if (test_vdupq_lane_f32 ())
+    abort ();
+  if (test_vdupq_lane_s8 ())
+    abort ();
+  if (test_vdupq_lane_s16 ())
+    abort ();
+  if (test_vdupq_lane_s32 ())
+    abort ();
+  if (test_vdupq_lane_s64 ())
+    abort ();
+
+  return 0;
+}
+
+/* Asm check for test_vdup_lane_s8.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, v\[0-9\]+\.b\\\[0\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, v\[0-9\]+\.b\\\[1\\\]" 1 } } */
+
+/* Asm check for test_vdupq_lane_s8.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, v\[0-9\]+\.b\\\[0\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, v\[0-9\]+\.b\\\[1\\\]" 1 } } */
+
+/* Asm check for test_vdup_lane_s16.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, v\[0-9\]+\.h\\\[0\\\]" 1 } } */
+/* Asm check for test_vdup_lane_s16.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, v\[0-9\]+\.h\\\[1\\\]" 1 } } */
+
+/* Asm check for test_vdupq_lane_s16.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, v\[0-9\]+\.h\\\[0\\\]" 1 } } */
+/* Asm check for test_vdupq_lane_s16.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, v\[0-9\]+\.h\\\[1\\\]" 1 } } */
+
+/* Asm check for test_vdup_lane_f32 and test_vdup_lane_s32.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, v\[0-9\]+\.s\\\[0\\\]" 2 } } */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, v\[0-9\]+\.s\\\[1\\\]" 2 } } */
+
+/* Asm check for test_vdupq_lane_f32 and test_vdupq_lane_s32.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, v\[0-9\]+\.s\\\[0\\\]" 2 } } */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, v\[0-9\]+\.s\\\[1\\\]" 2 } } */
+
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_15.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_15.c
@@ -0,0 +1,19 @@
+/* Verify:
+     * with outgoing.
+     * total frame size > 512.
+       area except outgoing <= 512
+     * number of callee-save reg >= 2.
+     * split the stack adjustment into two substractions,
+       the first could be optimized into "stp !".  */
+
+/* { dg-do run } */
+/* { dg-options "-O2 --save-temps" } */
+
+#include "test_frame_common.h"
+
+t_frame_pattern_outgoing (test15, 480, , 8, a[8])
+t_frame_run (test15)
+
+/* { dg-final { scan-assembler-times "sub\tsp, sp, #\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "stp\tx29, x30, \\\[sp, -\[0-9\]+\\\]!" 3 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/bics_3.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/bics_3.c
@@ -0,0 +1,69 @@
+/* { dg-do run } */
+/* { dg-options "-O2 --save-temps" } */
+
+extern void abort (void);
+
+int __attribute__ ((noinline))
+bics_si_test (int a, int b)
+{
+  if (a & ~b)
+    return 1;
+  else
+    return 0;
+}
+
+int __attribute__ ((noinline))
+bics_si_test2 (int a, int b)
+{
+  if (a & ~ (b << 2))
+    return 1;
+  else
+    return 0;
+}
+
+typedef long long s64;
+
+int __attribute__ ((noinline))
+bics_di_test (s64 a, s64 b)
+{
+  if (a & ~b)
+    return 1;
+  else
+    return 0;
+}
+
+int __attribute__ ((noinline))
+bics_di_test2 (s64 a, s64 b)
+{
+  if (a & ~(b << 2))
+    return 1;
+  else
+    return 0;
+}
+
+int
+main (void)
+{
+  int a = 5;
+  int b = 5;
+  int c = 20;
+  s64 d = 5;
+  s64 e = 5;
+  s64 f = 20;
+  if (bics_si_test (a, b))
+    abort ();
+  if (bics_si_test2 (c, b))
+    abort ();
+  if (bics_di_test (d, e))
+    abort ();
+  if (bics_di_test2 (f, e))
+    abort ();
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times "bics\twzr, w\[0-9\]+, w\[0-9\]+" 2 } } */
+/* { dg-final { scan-assembler-times "bics\twzr, w\[0-9\]+, w\[0-9\]+, lsl 2" 1 } } */
+/* { dg-final { scan-assembler-times "bics\txzr, x\[0-9\]+, x\[0-9\]+" 2 } } */
+/* { dg-final { scan-assembler-times "bics\txzr, x\[0-9\]+, x\[0-9\]+, lsl 2" 1 } } */
+
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/vbslq_u64_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/vbslq_u64_1.c
@@ -0,0 +1,17 @@
+/* Test if a BSL-like instruction can be generated from a C idiom.  */
+/* { dg-do assemble } */
+/* { dg-options "--save-temps -O3" } */
+
+#include <arm_neon.h>
+
+/* Folds to BIF.  */
+
+uint32x4_t
+vbslq_dummy_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t mask)
+{
+  return (mask & a) | (~mask & b);
+}
+
+/* { dg-final { scan-assembler-times "bif\\tv" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
+
--- a/src/gcc/testsuite/gcc.target/aarch64/vdup_n_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/vdup_n_1.c
@@ -0,0 +1,619 @@
+/* Test vdup_lane intrinsics work correctly.  */
+/* { dg-do run } */
+/* { dg-options "-O1 --save-temps" } */
+
+#include <arm_neon.h>
+
+extern void abort (void);
+
+float32x2_t __attribute__ ((noinline))
+wrap_vdup_n_f32 (float32_t a)
+{
+  return vdup_n_f32 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdup_n_f32 ()
+{
+  float32_t a = 1.0;
+  float32x2_t b;
+  float32_t c[2];
+  int i;
+
+  b = wrap_vdup_n_f32 (a);
+  vst1_f32 (c, b);
+  for (i = 0; i < 2; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+float32x4_t __attribute__ ((noinline))
+wrap_vdupq_n_f32 (float32_t a)
+{
+  return vdupq_n_f32 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdupq_n_f32 ()
+{
+  float32_t a = 1.0;
+  float32x4_t b;
+  float32_t c[4];
+  int i;
+
+  b = wrap_vdupq_n_f32 (a);
+  vst1q_f32 (c, b);
+  for (i = 0; i < 4; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+float64x1_t __attribute__ ((noinline))
+wrap_vdup_n_f64 (float64_t a)
+{
+  return vdup_n_f64 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdup_n_f64 ()
+{
+  float64_t a = 1.0;
+  float64x1_t b;
+  float64_t c[1];
+  int i;
+
+  b = wrap_vdup_n_f64 (a);
+  vst1_f64 (c, b);
+  for (i = 0; i < 1; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+float64x2_t __attribute__ ((noinline))
+wrap_vdupq_n_f64 (float64_t a)
+{
+  return vdupq_n_f64 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdupq_n_f64 ()
+{
+  float64_t a = 1.0;
+  float64x2_t b;
+  float64_t c[2];
+  int i;
+
+  b = wrap_vdupq_n_f64 (a);
+  vst1q_f64 (c, b);
+  for (i = 0; i < 2; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+poly8x8_t __attribute__ ((noinline))
+wrap_vdup_n_p8 (poly8_t a)
+{
+  return vdup_n_p8 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdup_n_p8 ()
+{
+  poly8_t a = 1;
+  poly8x8_t b;
+  poly8_t c[8];
+  int i;
+
+  b = wrap_vdup_n_p8 (a);
+  vst1_p8 (c, b);
+  for (i = 0; i < 8; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+poly8x16_t __attribute__ ((noinline))
+wrap_vdupq_n_p8 (poly8_t a)
+{
+  return vdupq_n_p8 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdupq_n_p8 ()
+{
+  poly8_t a = 1;
+  poly8x16_t b;
+  poly8_t c[16];
+  int i;
+
+  b = wrap_vdupq_n_p8 (a);
+  vst1q_p8 (c, b);
+  for (i = 0; i < 16; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+int8x8_t __attribute__ ((noinline))
+wrap_vdup_n_s8 (int8_t a)
+{
+  return vdup_n_s8 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdup_n_s8 ()
+{
+  int8_t a = 1;
+  int8x8_t b;
+  int8_t c[8];
+  int i;
+
+  b = wrap_vdup_n_s8 (a);
+  vst1_s8 (c, b);
+  for (i = 0; i < 8; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+int8x16_t __attribute__ ((noinline))
+wrap_vdupq_n_s8 (int8_t a)
+{
+  return vdupq_n_s8 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdupq_n_s8 ()
+{
+  int8_t a = 1;
+  int8x16_t b;
+  int8_t c[16];
+  int i;
+
+  b = wrap_vdupq_n_s8 (a);
+  vst1q_s8 (c, b);
+  for (i = 0; i < 16; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+uint8x8_t __attribute__ ((noinline))
+wrap_vdup_n_u8 (uint8_t a)
+{
+  return vdup_n_u8 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdup_n_u8 ()
+{
+  uint8_t a = 1;
+  uint8x8_t b;
+  uint8_t c[8];
+  int i;
+
+  b = wrap_vdup_n_u8 (a);
+  vst1_u8 (c, b);
+  for (i = 0; i < 8; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+uint8x16_t __attribute__ ((noinline))
+wrap_vdupq_n_u8 (uint8_t a)
+{
+  return vdupq_n_u8 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdupq_n_u8 ()
+{
+  uint8_t a = 1;
+  uint8x16_t b;
+  uint8_t c[16];
+  int i;
+
+  b = wrap_vdupq_n_u8 (a);
+  vst1q_u8 (c, b);
+  for (i = 0; i < 16; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+poly16x4_t __attribute__ ((noinline))
+wrap_vdup_n_p16 (poly16_t a)
+{
+  return vdup_n_p16 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdup_n_p16 ()
+{
+  poly16_t a = 1;
+  poly16x4_t b;
+  poly16_t c[4];
+  int i;
+
+  b = wrap_vdup_n_p16 (a);
+  vst1_p16 (c, b);
+  for (i = 0; i < 4; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+poly16x8_t __attribute__ ((noinline))
+wrap_vdupq_n_p16 (poly16_t a)
+{
+  return vdupq_n_p16 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdupq_n_p16 ()
+{
+  poly16_t a = 1;
+  poly16x8_t b;
+  poly16_t c[8];
+  int i;
+
+  b = wrap_vdupq_n_p16 (a);
+  vst1q_p16 (c, b);
+  for (i = 0; i < 8; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+int16x4_t __attribute__ ((noinline))
+wrap_vdup_n_s16 (int16_t a)
+{
+  return vdup_n_s16 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdup_n_s16 ()
+{
+  int16_t a = 1;
+  int16x4_t b;
+  int16_t c[4];
+  int i;
+
+  b = wrap_vdup_n_s16 (a);
+  vst1_s16 (c, b);
+  for (i = 0; i < 4; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+int16x8_t __attribute__ ((noinline))
+wrap_vdupq_n_s16 (int16_t a)
+{
+  return vdupq_n_s16 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdupq_n_s16 ()
+{
+  int16_t a = 1;
+  int16x8_t b;
+  int16_t c[8];
+  int i;
+
+  b = wrap_vdupq_n_s16 (a);
+  vst1q_s16 (c, b);
+  for (i = 0; i < 8; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+uint16x4_t __attribute__ ((noinline))
+wrap_vdup_n_u16 (uint16_t a)
+{
+  return vdup_n_u16 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdup_n_u16 ()
+{
+  uint16_t a = 1;
+  uint16x4_t b;
+  uint16_t c[4];
+  int i;
+
+  b = wrap_vdup_n_u16 (a);
+  vst1_u16 (c, b);
+  for (i = 0; i < 4; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+uint16x8_t __attribute__ ((noinline))
+wrap_vdupq_n_u16 (uint16_t a)
+{
+  return vdupq_n_u16 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdupq_n_u16 ()
+{
+  uint16_t a = 1;
+  uint16x8_t b;
+  uint16_t c[8];
+  int i;
+
+  b = wrap_vdupq_n_u16 (a);
+  vst1q_u16 (c, b);
+  for (i = 0; i < 8; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+int32x2_t __attribute__ ((noinline))
+wrap_vdup_n_s32 (int32_t a)
+{
+  return vdup_n_s32 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdup_n_s32 ()
+{
+  int32_t a = 1;
+  int32x2_t b;
+  int32_t c[2];
+  int i;
+
+  b = wrap_vdup_n_s32 (a);
+  vst1_s32 (c, b);
+  for (i = 0; i < 2; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+int32x4_t __attribute__ ((noinline))
+wrap_vdupq_n_s32 (int32_t a)
+{
+  return vdupq_n_s32 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdupq_n_s32 ()
+{
+  int32_t a = 1;
+  int32x4_t b;
+  int32_t c[4];
+  int i;
+
+  b = wrap_vdupq_n_s32 (a);
+  vst1q_s32 (c, b);
+  for (i = 0; i < 4; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+uint32x2_t __attribute__ ((noinline))
+wrap_vdup_n_u32 (uint32_t a)
+{
+  return vdup_n_u32 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdup_n_u32 ()
+{
+  uint32_t a = 1;
+  uint32x2_t b;
+  uint32_t c[2];
+  int i;
+
+  b = wrap_vdup_n_u32 (a);
+  vst1_u32 (c, b);
+  for (i = 0; i < 2; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+uint32x4_t __attribute__ ((noinline))
+wrap_vdupq_n_u32 (uint32_t a)
+{
+  return vdupq_n_u32 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdupq_n_u32 ()
+{
+  uint32_t a = 1;
+  uint32x4_t b;
+  uint32_t c[4];
+  int i;
+
+  b = wrap_vdupq_n_u32 (a);
+  vst1q_u32 (c, b);
+  for (i = 0; i < 4; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+int64x1_t __attribute__ ((noinline))
+wrap_vdup_n_s64 (int64_t a)
+{
+  return vdup_n_s64 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdup_n_s64 ()
+{
+  int64_t a = 1;
+  int64x1_t b;
+  int64_t c[1];
+  int i;
+
+  b = wrap_vdup_n_s64 (a);
+  vst1_s64 (c, b);
+  for (i = 0; i < 1; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+int64x2_t __attribute__ ((noinline))
+wrap_vdupq_n_s64 (int64_t a)
+{
+  return vdupq_n_s64 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdupq_n_s64 ()
+{
+  int64_t a = 1;
+  int64x2_t b;
+  int64_t c[2];
+  int i;
+
+  b = wrap_vdupq_n_s64 (a);
+  vst1q_s64 (c, b);
+  for (i = 0; i < 2; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+uint64x1_t __attribute__ ((noinline))
+wrap_vdup_n_u64 (uint64_t a)
+{
+  return vdup_n_u64 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdup_n_u64 ()
+{
+  uint64_t a = 1;
+  uint64x1_t b;
+  uint64_t c[1];
+  int i;
+
+  b = wrap_vdup_n_u64 (a);
+  vst1_u64 (c, b);
+  for (i = 0; i < 1; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+uint64x2_t __attribute__ ((noinline))
+wrap_vdupq_n_u64 (uint64_t a)
+{
+  return vdupq_n_u64 (a);
+}
+
+int __attribute__ ((noinline))
+test_vdupq_n_u64 ()
+{
+  uint64_t a = 1;
+  uint64x2_t b;
+  uint64_t c[2];
+  int i;
+
+  b = wrap_vdupq_n_u64 (a);
+  vst1q_u64 (c, b);
+  for (i = 0; i < 2; i++)
+    if (a != c[i])
+      return 1;
+  return 0;
+}
+
+int
+main ()
+{
+  if (test_vdup_n_f32 ())
+    abort ();
+  if (test_vdup_n_f64 ())
+    abort ();
+  if (test_vdup_n_p8 ())
+    abort ();
+  if (test_vdup_n_u8 ())
+    abort ();
+  if (test_vdup_n_s8 ())
+    abort ();
+  if (test_vdup_n_p16 ())
+    abort ();
+  if (test_vdup_n_s16 ())
+    abort ();
+  if (test_vdup_n_u16 ())
+    abort ();
+  if (test_vdup_n_s32 ())
+    abort ();
+  if (test_vdup_n_u32 ())
+    abort ();
+  if (test_vdup_n_s64 ())
+    abort ();
+  if (test_vdup_n_u64 ())
+    abort ();
+  if (test_vdupq_n_f32 ())
+    abort ();
+  if (test_vdupq_n_f64 ())
+    abort ();
+  if (test_vdupq_n_p8 ())
+    abort ();
+  if (test_vdupq_n_u8 ())
+    abort ();
+  if (test_vdupq_n_s8 ())
+    abort ();
+  if (test_vdupq_n_p16 ())
+    abort ();
+  if (test_vdupq_n_s16 ())
+    abort ();
+  if (test_vdupq_n_u16 ())
+    abort ();
+  if (test_vdupq_n_s32 ())
+    abort ();
+  if (test_vdupq_n_u32 ())
+    abort ();
+  if (test_vdupq_n_s64 ())
+    abort ();
+  if (test_vdupq_n_u64 ())
+    abort ();
+  return 0;
+}
+
+/* No asm checks for vdup_n_f32, vdupq_n_f32, vdup_n_f64 and vdupq_n_f64.
+   Cannot force floating point value in general purpose regester.  */
+
+/* Asm check for test_vdup_n_p8, test_vdup_n_s8, test_vdup_n_u8.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, w\[0-9\]+" 3 } } */
+
+/* Asm check for test_vdupq_n_p8, test_vdupq_n_s8, test_vdupq_n_u8.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, w\[0-9\]+" 3 } } */
+
+/* Asm check for test_vdup_n_p16, test_vdup_n_s16, test_vdup_n_u16.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, w\[0-9\]+" 3 } } */
+
+/* Asm check for test_vdupq_n_p16, test_vdupq_n_s16, test_vdupq_n_u16.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, w\[0-9\]+" 3 } } */
+
+/* Asm check for test_vdup_n_s32, test_vdup_n_u32.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, w\[0-9\]+" 2 } } */
+
+/* Asm check for test_vdupq_n_s32, test_vdupq_n_u32.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, w\[0-9\]+" 2 } } */
+
+/* Asm check for test_vdup_n_s64, test_vdup_n_u64 are left out.
+   Attempts to make the compiler generate "dup\\td\[0-9\]+, x\[0-9\]+"
+   are not practical.  */
+
+/* Asm check for test_vdupq_n_s64, test_vdupq_n_u64.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2d, x\[0-9\]+" 2 } } */
+
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_4.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_4.c
@@ -0,0 +1,19 @@
+/* Verify:
+     * -fomit-frame-pointer.
+     * without outgoing.
+     * total frame size <= 512 but > 256.
+     * number of callee-save reg >= 2.
+     * we can use "stp !" to optimize stack adjustment.  */
+
+/* { dg-do run } */
+/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */
+
+#include "test_frame_common.h"
+
+t_frame_pattern (test4, 400, "x19")
+t_frame_run (test4)
+
+/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp, -\[0-9\]+\\\]!" 1 } } */
+/* { dg-final { scan-assembler-times "ldp\tx19, x30, \\\[sp\\\], \[0-9\]+" 2 } } */
+
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/fcsel_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/fcsel_1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options " -O2 " } */
+
+float
+f_1 (float a, float b, float c, float d)
+{
+  if (a > 0.0)
+    return c;
+  else
+    return 2.0;
+}
+
+double
+f_2 (double a, double b, double c, double d)
+{
+  if (a > b)
+    return c;
+  else
+    return d;
+}
+
+/* { dg-final { scan-assembler-times "\tfcsel" 2 } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/pr62178.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/pr62178.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+int a[30 +1][30 +1], b[30 +1][30 +1], r[30 +1][30 +1];
+
+void foo (void) {
+  int i, j, k;
+
+  for ( i = 1; i <= 30; i++ )
+    for ( j = 1; j <= 30; j++ ) {
+      r[i][j] = 0;
+      for(k = 1; k <= 30; k++ )
+        r[i][j] += a[i][k]*b[k][j];
+    }
+}
+
+/* { dg-final { scan-assembler "ld1r\\t\{v\[0-9\]+\."} } */
--- a/src/gcc/testsuite/gcc.target/aarch64/vect_ctz_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect_ctz_1.c
@@ -0,0 +1,41 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -save-temps -fno-inline" } */
+
+extern void abort ();
+
+#define TEST(name, subname, count) \
+void \
+count_tz_##name (unsigned *__restrict a, int *__restrict b) \
+{ \
+  int i; \
+  for (i = 0; i < count; i++) \
+    b[i] = __builtin_##subname (a[i]); \
+}
+
+#define CHECK(name, count, input, output) \
+  count_tz_##name (input, output); \
+  for (i = 0; i < count; i++) \
+    { \
+      if (output[i] != r[i]) \
+	abort (); \
+    }
+
+TEST (v4si, ctz, 4)
+TEST (v2si, ctz, 2)
+/* { dg-final { scan-assembler "clz\tv\[0-9\]+\.4s" } } */
+/* { dg-final { scan-assembler "clz\tv\[0-9\]+\.2s" } } */
+
+int
+main ()
+{
+  unsigned int x4[4] = { 0x0, 0xFF80, 0x1FFFF, 0xFF000000 };
+  int r[4] = { 32, 7, 0, 24 };
+  int d[4], i;
+
+  CHECK (v4si, 4, x4, d);
+  CHECK (v2si, 2, x4, d);
+
+  return 0;
+}
+
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-fp.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fp.c
@@ -8,11 +8,11 @@
 
 
 #define DEFN_SETV(type) \
-		set_vector_##type (pR##type a, type n)   \
-		{ 				         \
-		  int i;			         \
-		  for (i=0; i<16; i++)		         \
-		    a[i] = n;				 \
+		void set_vector_##type (pR##type a, type n)   \
+		{					      \
+		  int i;				      \
+		  for (i=0; i<16; i++)			      \
+		    a[i] = n;				      \
 		}
 
 #define DEFN_CHECKV(type) \
--- a/src/gcc/testsuite/gcc.target/aarch64/rev16_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/rev16_1.c
@@ -0,0 +1,59 @@
+/* { dg-options "-O2" } */
+/* { dg-do run } */
+
+extern void abort (void);
+
+typedef unsigned int __u32;
+
+__u32
+__rev16_32_alt (__u32 x)
+{
+  return (((__u32)(x) & (__u32)0xff00ff00UL) >> 8)
+         | (((__u32)(x) & (__u32)0x00ff00ffUL) << 8);
+}
+
+__u32
+__rev16_32 (__u32 x)
+{
+  return (((__u32)(x) & (__u32)0x00ff00ffUL) << 8)
+         | (((__u32)(x) & (__u32)0xff00ff00UL) >> 8);
+}
+
+typedef unsigned long long __u64;
+
+__u64
+__rev16_64_alt (__u64 x)
+{
+  return (((__u64)(x) & (__u64)0xff00ff00ff00ff00UL) >> 8)
+         | (((__u64)(x) & (__u64)0x00ff00ff00ff00ffUL) << 8);
+}
+
+__u64
+__rev16_64 (__u64 x)
+{
+  return (((__u64)(x) & (__u64)0x00ff00ff00ff00ffUL) << 8)
+         | (((__u64)(x) & (__u64)0xff00ff00ff00ff00UL) >> 8);
+}
+
+int
+main (void)
+{
+  volatile __u32 in32 = 0x12345678;
+  volatile __u32 expected32 = 0x34127856;
+  volatile __u64 in64 = 0x1234567890abcdefUL;
+  volatile __u64 expected64 = 0x34127856ab90efcdUL;
+
+  if (__rev16_32 (in32) != expected32)
+    abort ();
+
+  if (__rev16_32_alt (in32) != expected32)
+    abort ();
+
+  if (__rev16_64 (in64) != expected64)
+    abort ();
+
+  if (__rev16_64_alt (in64) != expected64)
+    abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/vget_high_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/vget_high_1.c
@@ -0,0 +1,60 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -std=c99" } */
+
+#include <arm_neon.h>
+
+extern void abort (void);
+
+#define VARIANTS(VARIANT)				\
+VARIANT (uint8_t, 8, uint8x8_t, uint8x16_t, u8)		\
+VARIANT (uint16_t, 4, uint16x4_t, uint16x8_t, u16)	\
+VARIANT (uint32_t, 2, uint32x2_t, uint32x4_t, u32)	\
+VARIANT (uint64_t, 1, uint64x1_t, uint64x2_t, u64)	\
+VARIANT (int8_t, 8, int8x8_t, int8x16_t, s8)		\
+VARIANT (int16_t, 4, int16x4_t, int16x8_t, s16)		\
+VARIANT (int32_t, 2, int32x2_t, int32x4_t, s32)		\
+VARIANT (int64_t, 1, int64x1_t, int64x2_t, s64)		\
+VARIANT (float32_t, 2, float32x2_t, float32x4_t, f32)	\
+VARIANT (float64_t, 1, float64x1_t, float64x2_t, f64)
+
+
+#define TESTMETH(BASETYPE, NUM64, TYPE64, TYPE128, SUFFIX)	\
+int								\
+test_vget_low_ ##SUFFIX (BASETYPE *data)			\
+{								\
+  BASETYPE temp [NUM64];					\
+  TYPE128 vec = vld1q_##SUFFIX (data);				\
+  TYPE64 high = vget_high_##SUFFIX (vec);			\
+  vst1_##SUFFIX (temp, high);					\
+  for (int i = 0; i < NUM64; i++)				\
+    if (temp[i] != data[i + NUM64])				\
+      return 1;							\
+  return 0;							\
+}
+
+VARIANTS (TESTMETH)
+
+#define CHECK(BASETYPE, NUM64, TYPE64, TYPE128, SUFFIX)		\
+  if (test_vget_low_##SUFFIX (BASETYPE ## _ ## data) != 0)	\
+    abort ();
+
+int
+main (int argc, char **argv)
+{
+  uint8_t uint8_t_data[16] =
+      { 1, 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47 };
+  uint16_t uint16_t_data[8] = { 1, 22, 333, 4444, 55555, 6666, 777, 88 };
+  uint32_t uint32_t_data[4] = { 65537, 11, 70000, 23 };
+  uint64_t uint64_t_data[2] = { 0xdeadbeefcafebabeULL, 0x0123456789abcdefULL };
+  int8_t int8_t_data[16] =
+      { -1, -3, -5, -7, 9, -11, -13, 15, -17, -19, 21, -23, 25, 27, -29, -31 };
+  int16_t int16_t_data[8] = { -17, 19, 3, -999, 44048, 505, 9999, 1000};
+  int32_t int32_t_data[4] = { 123456789, -987654321, -135792468, 975318642 };
+  int64_t int64_t_data[2] = {0xfedcba9876543210LL, 0xdeadbabecafebeefLL };
+  float32_t float32_t_data[4] = { 3.14159, 2.718, 1.414, 100.0 };
+  float64_t float64_t_data[2] = { 1.01001000100001, 12345.6789 };
+
+  VARIANTS (CHECK);
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/vldN_dup_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/vldN_dup_1.c
@@ -0,0 +1,84 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -fno-inline" } */
+
+#include <arm_neon.h>
+
+extern void abort (void);
+
+#define VARIANTS(VARIANT, STRUCT)	\
+VARIANT (uint8, , 8, _u8, STRUCT)	\
+VARIANT (uint16, , 4, _u16, STRUCT)	\
+VARIANT (uint32, , 2, _u32, STRUCT)	\
+VARIANT (uint64, , 1, _u64, STRUCT)	\
+VARIANT (int8, , 8, _s8, STRUCT)	\
+VARIANT (int16, , 4, _s16, STRUCT)	\
+VARIANT (int32, , 2, _s32, STRUCT)	\
+VARIANT (int64, , 1, _s64, STRUCT)	\
+VARIANT (poly8, , 8, _p8, STRUCT)	\
+VARIANT (poly16, , 4, _p16, STRUCT)	\
+VARIANT (float32, , 2, _f32, STRUCT)	\
+VARIANT (float64, , 1, _f64, STRUCT)	\
+VARIANT (uint8, q, 16, _u8, STRUCT)	\
+VARIANT (uint16, q, 8, _u16, STRUCT)	\
+VARIANT (uint32, q, 4, _u32, STRUCT)	\
+VARIANT (uint64, q, 2, _u64, STRUCT)	\
+VARIANT (int8, q, 16, _s8, STRUCT)	\
+VARIANT (int16, q, 8, _s16, STRUCT)	\
+VARIANT (int32, q, 4, _s32, STRUCT)	\
+VARIANT (int64, q, 2, _s64, STRUCT)	\
+VARIANT (poly8, q, 16, _p8, STRUCT)	\
+VARIANT (poly16, q, 8, _p16, STRUCT)	\
+VARIANT (float32, q, 4, _f32, STRUCT)	\
+VARIANT (float64, q, 2, _f64, STRUCT)
+
+#define TESTMETH(BASE, Q, ELTS, SUFFIX, STRUCT)	\
+int								\
+test_vld##STRUCT##Q##_dup##SUFFIX (const BASE##_t *data)	\
+{								\
+  BASE##_t temp[ELTS];						\
+  BASE##x##ELTS##x##STRUCT##_t vectors =			\
+			   vld##STRUCT##Q##_dup##SUFFIX (data); \
+  int i,j;							\
+  for (i = 0; i < STRUCT; i++)					\
+    {								\
+      vst1##Q##SUFFIX (temp, vectors.val[i]);			\
+      for (j = 0; j < ELTS; j++)				\
+        if (temp[j] != data[i])					\
+          return 1;						\
+    }								\
+  return 0;							\
+}
+
+/* Tests of vld2_dup and vld2q_dup.  */
+VARIANTS (TESTMETH, 2)
+/* Tests of vld3_dup and vld3q_dup.  */
+VARIANTS (TESTMETH, 3)
+/* Tests of vld4_dup and vld4q_dup.  */
+VARIANTS (TESTMETH, 4)
+
+#define CHECK(BASE, Q, ELTS, SUFFIX, STRUCT)			\
+  if (test_vld##STRUCT##Q##_dup##SUFFIX (BASE ##_data) != 0)	\
+    abort ();
+
+int
+main (int argc, char **argv)
+{
+  uint8_t uint8_data[4] = { 7, 11, 13, 17 };
+  uint16_t uint16_data[4] = { 257, 263, 269, 271 };
+  uint32_t uint32_data[4] = { 65537, 65539, 65543, 65551 };
+  uint64_t uint64_data[4] = { 0xdeadbeefcafebabeULL, 0x0123456789abcdefULL,
+			      0xfedcba9876543210LL, 0xdeadbabecafebeefLL };
+  int8_t int8_data[4] = { -1, 3, -5, 7 };
+  int16_t int16_data[4] = { 257, -259, 261, -263 };
+  int32_t int32_data[4] = { 123456789, -987654321, -135792468, 975318642 };
+  int64_t *int64_data = (int64_t *)uint64_data;
+  poly8_t poly8_data[4] = { 0, 7, 13, 18, };
+  poly16_t poly16_data[4] = { 11111, 2222, 333, 44 };
+  float32_t float32_data[4] = { 3.14159, 2.718, 1.414, 100.0 };
+  float64_t float64_data[4] = { 1.010010001, 12345.6789, -9876.54321, 1.618 };
+
+  VARIANTS (CHECK, 2);
+  VARIANTS (CHECK, 3);
+  VARIANTS (CHECK, 4);
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c
@@ -0,0 +1,343 @@
+/* Test vdup_lane intrinsics work correctly.  */
+/* { dg-do run } */
+/* { dg-options "-O1 --save-temps" } */
+
+#include <arm_neon.h>
+
+#define force_simd(V1) asm volatile ("mov %d0, %1.d[0]" \
+         : "=w"(V1)                                     \
+         : "w"(V1)                                      \
+         : /* No clobbers */)
+
+extern void abort (void);
+
+float32_t __attribute__ ((noinline))
+wrap_vdups_lane_f32_0 (float32x2_t dummy, float32x2_t a)
+{
+  return vdups_lane_f32 (a, 0);
+}
+
+float32_t __attribute__ ((noinline))
+wrap_vdups_lane_f32_1 (float32x2_t a)
+{
+  return vdups_lane_f32 (a, 1);
+}
+
+int __attribute__ ((noinline))
+test_vdups_lane_f32 ()
+{
+  float32x2_t a;
+  float32_t b;
+  float32_t c[2] = { 0.0, 1.0 };
+
+  a = vld1_f32 (c);
+  b = wrap_vdups_lane_f32_0 (a, a);
+  if (c[0] != b)
+    return 1;
+  b = wrap_vdups_lane_f32_1 (a);
+  if (c[1] != b)
+    return 1;
+  return 0;
+}
+
+float64_t __attribute__ ((noinline))
+wrap_vdupd_lane_f64_0 (float64x1_t dummy, float64x1_t a)
+{
+  return vdupd_lane_f64 (a, 0);
+}
+
+int __attribute__ ((noinline))
+test_vdupd_lane_f64 ()
+{
+  float64x1_t a;
+  float64_t b;
+  float64_t c[1] = { 0.0 };
+  a = vld1_f64 (c);
+  b = wrap_vdupd_lane_f64_0 (a, a);
+  if (c[0] != b)
+    return 1;
+  return 0;
+}
+
+int8_t __attribute__ ((noinline))
+wrap_vdupb_lane_s8_0 (int8x8_t dummy, int8x8_t a)
+{
+  int8_t result = vdupb_lane_s8 (a, 0);
+  force_simd (result);
+  return result;
+}
+
+int8_t __attribute__ ((noinline))
+wrap_vdupb_lane_s8_1 (int8x8_t a)
+{
+  int8_t result = vdupb_lane_s8 (a, 1);
+  force_simd (result);
+  return result;
+}
+
+int __attribute__ ((noinline))
+test_vdupb_lane_s8 ()
+{
+  int8x8_t a;
+  int8_t b;
+  int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
+
+  a = vld1_s8 (c);
+  b = wrap_vdupb_lane_s8_0 (a, a);
+  if (c[0] != b)
+    return 1;
+  b = wrap_vdupb_lane_s8_1 (a);
+  if (c[1] != b)
+    return 1;
+
+  return 0;
+}
+
+uint8_t __attribute__ ((noinline))
+wrap_vdupb_lane_u8_0 (uint8x8_t dummy, uint8x8_t a)
+{
+  uint8_t result = vdupb_lane_u8 (a, 0);
+  force_simd (result);
+  return result;
+}
+
+uint8_t __attribute__ ((noinline))
+wrap_vdupb_lane_u8_1 (uint8x8_t a)
+{
+  uint8_t result = vdupb_lane_u8 (a, 1);
+  force_simd (result);
+  return result;
+}
+
+int __attribute__ ((noinline))
+test_vdupb_lane_u8 ()
+{
+  uint8x8_t a;
+  uint8_t b;
+  uint8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
+
+  a = vld1_u8 (c);
+  b = wrap_vdupb_lane_u8_0 (a, a);
+  if (c[0] != b)
+    return 1;
+  b = wrap_vdupb_lane_u8_1 (a);
+  if (c[1] != b)
+    return 1;
+  return 0;
+}
+
+int16_t __attribute__ ((noinline))
+wrap_vduph_lane_s16_0 (int16x4_t dummy, int16x4_t a)
+{
+  int16_t result = vduph_lane_s16 (a, 0);
+  force_simd (result);
+  return result;
+}
+
+int16_t __attribute__ ((noinline))
+wrap_vduph_lane_s16_1 (int16x4_t a)
+{
+  int16_t result = vduph_lane_s16 (a, 1);
+  force_simd (result);
+  return result;
+}
+
+int __attribute__ ((noinline))
+test_vduph_lane_s16 ()
+{
+  int16x4_t a;
+  int16_t b;
+  int16_t c[4] = { 0, 1, 2, 3 };
+
+  a = vld1_s16 (c);
+  b = wrap_vduph_lane_s16_0 (a, a);
+  if (c[0] != b)
+    return 1;
+  b = wrap_vduph_lane_s16_1 (a);
+  if (c[1] != b)
+    return 1;
+  return 0;
+}
+
+uint16_t __attribute__ ((noinline))
+wrap_vduph_lane_u16_0 (uint16x4_t dummy, uint16x4_t a)
+{
+  uint16_t result = vduph_lane_u16 (a, 0);
+  force_simd (result);
+  return result;
+}
+
+uint16_t __attribute__ ((noinline))
+wrap_vduph_lane_u16_1 (uint16x4_t a)
+{
+  uint16_t result = vduph_lane_u16 (a, 1);
+  force_simd (result);
+  return result;
+}
+
+int __attribute__ ((noinline))
+test_vduph_lane_u16 ()
+{
+  uint16x4_t a;
+  uint16_t b;
+  uint16_t c[4] = { 0, 1, 2, 3 };
+
+  a = vld1_u16 (c);
+  b = wrap_vduph_lane_u16_0 (a, a);
+  if (c[0] != b)
+    return 1;
+  b = wrap_vduph_lane_u16_1 (a);
+  if (c[1] != b)
+    return 1;
+  return 0;
+}
+
+int32_t __attribute__ ((noinline))
+wrap_vdups_lane_s32_0 (int32x2_t dummy, int32x2_t a)
+{
+  int32_t result = vdups_lane_s32 (a, 0);
+  force_simd (result);
+  return result;
+}
+
+int32_t __attribute__ ((noinline))
+wrap_vdups_lane_s32_1 (int32x2_t a)
+{
+  int32_t result = vdups_lane_s32 (a, 1);
+  force_simd (result);
+  return result;
+}
+
+int __attribute__ ((noinline))
+test_vdups_lane_s32 ()
+{
+  int32x2_t a;
+  int32_t b;
+  int32_t c[2] = { 0, 1 };
+
+  a = vld1_s32 (c);
+  b = wrap_vdups_lane_s32_0 (vcreate_s32 (0), a);
+  if (c[0] != b)
+    return 1;
+  b = wrap_vdups_lane_s32_1 (a);
+  if (c[1] != b)
+    return 1;
+  return 0;
+}
+
+uint32_t __attribute__ ((noinline))
+wrap_vdups_lane_u32_0 (uint32x2_t dummy, uint32x2_t a)
+{
+  uint32_t result = vdups_lane_u32 (a, 0);
+  force_simd (result);
+  return result;
+}
+
+uint32_t __attribute__ ((noinline))
+wrap_vdups_lane_u32_1 (uint32x2_t a)
+{
+  uint32_t result = vdups_lane_u32 (a, 1);
+  force_simd (result);
+  return result;
+}
+
+int __attribute__ ((noinline))
+test_vdups_lane_u32 ()
+{
+  uint32x2_t a;
+  uint32_t b;
+  uint32_t c[2] = { 0, 1 };
+  a = vld1_u32 (c);
+  b = wrap_vdups_lane_u32_0 (a, a);
+  if (c[0] != b)
+    return 1;
+  b = wrap_vdups_lane_u32_1 (a);
+  if (c[1] != b)
+    return 1;
+  return 0;
+}
+
+uint64_t __attribute__ ((noinline))
+wrap_vdupd_lane_u64_0 (uint64x1_t dummy, uint64x1_t a)
+{
+  return vdupd_lane_u64 (a, 0);;
+}
+
+int __attribute__ ((noinline))
+test_vdupd_lane_u64 ()
+{
+  uint64x1_t a;
+  uint64_t b;
+  uint64_t c[1] = { 0 };
+
+  a = vld1_u64 (c);
+  b = wrap_vdupd_lane_u64_0 (a, a);
+  if (c[0] != b)
+    return 1;
+  return 0;
+}
+
+int64_t __attribute__ ((noinline))
+wrap_vdupd_lane_s64_0 (uint64x1_t dummy, int64x1_t a)
+{
+  return vdupd_lane_u64 (a, 0);
+}
+
+int __attribute__ ((noinline))
+test_vdupd_lane_s64 ()
+{
+  int64x1_t a;
+  int64_t b;
+  int64_t c[1] = { 0 };
+
+  a = vld1_s64 (c);
+  b = wrap_vdupd_lane_s64_0 (a, a);
+  if (c[0] != b)
+    return 1;
+  return 0;
+}
+
+int
+main ()
+{
+  if (test_vdups_lane_f32 ())
+    abort ();
+  if (test_vdupd_lane_f64 ())
+    abort ();
+  if (test_vdupb_lane_s8 ())
+    abort ();
+  if (test_vdupb_lane_u8 ())
+    abort ();
+  if (test_vduph_lane_s16 ())
+    abort ();
+  if (test_vduph_lane_u16 ())
+    abort ();
+  if (test_vdups_lane_s32 ())
+    abort ();
+  if (test_vdups_lane_u32 ())
+    abort ();
+  if (test_vdupd_lane_s64 ())
+    abort ();
+  if (test_vdupd_lane_u64 ())
+    abort ();
+  return 0;
+}
+
+/* Asm check for vdupb_lane_s8, vdupb_lane_u8.  */
+/* { dg-final { scan-assembler-not "dup\\tb\[0-9\]+, v\[0-9\]+\.b\\\[0\\\]" } } */
+/* { dg-final { scan-assembler-times "dup\\tb\[0-9\]+, v\[0-9\]+\.b\\\[1\\\]" 2 } } */
+
+/* Asm check for vduph_lane_h16, vduph_lane_h16.  */
+/* { dg-final { scan-assembler-not "dup\\th\[0-9\]+, v\[0-9\]+\.h\\\[0\\\]" } } */
+/* { dg-final { scan-assembler-times "dup\\th\[0-9\]+, v\[0-9\]+\.h\\\[1\\\]" 2 } } */
+
+/* Asm check for vdups_lane_f32, vdups_lane_s32, vdups_lane_u32.  */
+/* Can't generate "dup s<n>, v<m>[0]" for vdups_lane_s32 and vdups_lane_u32.  */
+/* { dg-final { scan-assembler-times "dup\\ts\[0-9\]+, v\[0-9\]+\.s\\\[0\\\]" 1} } */
+/* { dg-final { scan-assembler-times "dup\\ts\[0-9\]+, v\[0-9\]+\.s\\\[1\\\]" 3 } } */
+
+/* Asm check for vdupd_lane_f64, vdupd_lane_s64, vdupd_lane_u64.  */
+/* Attempts to make the compiler generate vdupd are not practical.  */
+/* { dg-final { scan-assembler-not "dup\\td\[0-9\]+, v\[0-9\]+\.d\\\[0\\\]" } }
+
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/bics_4.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/bics_4.c
@@ -0,0 +1,87 @@
+/* { dg-do run } */
+/* { dg-options "-O2 --save-temps -fno-inline" } */
+
+extern void abort (void);
+
+int
+bics_si_test1 (int a, int b, int c)
+{
+  if ((a & b) == a)
+    return a;
+  else
+    return c;
+}
+
+int
+bics_si_test2 (int a, int b, int c)
+{
+  if ((a & b) == b)
+    return b;
+  else
+    return c;
+}
+
+typedef long long s64;
+
+s64
+bics_di_test1 (s64 a, s64 b, s64 c)
+{
+  if ((a & b) == a)
+    return a;
+  else
+    return c;
+}
+
+s64
+bics_di_test2 (s64 a, s64 b, s64 c)
+{
+  if ((a & b) == b)
+    return b;
+  else
+    return c;
+}
+
+int
+main ()
+{
+  int x;
+  s64 y;
+
+  x = bics_si_test1 (0xf00d, 0xf11f, 0);
+  if (x != 0xf00d)
+    abort ();
+
+  x = bics_si_test1 (0xf11f, 0xf00d, 0);
+  if (x != 0)
+    abort ();
+
+  x = bics_si_test2 (0xf00d, 0xf11f, 0);
+  if (x != 0)
+    abort ();
+
+  x = bics_si_test2 (0xf11f, 0xf00d, 0);
+  if (x != 0xf00d)
+    abort ();
+
+  y = bics_di_test1 (0x10001000f00dll, 0x12341000f00dll, 0ll);
+  if (y != 0x10001000f00dll)
+    abort ();
+
+  y = bics_di_test1 (0x12341000f00dll, 0x10001000f00dll, 0ll);
+  if (y != 0)
+    abort ();
+
+  y = bics_di_test2 (0x10001000f00dll, 0x12341000f00dll, 0ll);
+  if (y != 0)
+    abort ();
+
+  y = bics_di_test2 (0x12341000f00dll, 0x10001000f00dll, 0ll);
+  if (y != 0x10001000f00dll)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times "bics\twzr, w\[0-9\]+, w\[0-9\]+" 2 } } */
+/* { dg-final { scan-assembler-times "bics\txzr, x\[0-9\]+, x\[0-9\]+" 2 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/vbslq_u64_2.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/vbslq_u64_2.c
@@ -0,0 +1,22 @@
+/* Test vbslq_u64 can be folded.  */
+/* { dg-do assemble } */
+/* { dg-options "--save-temps -O3" } */
+#include <arm_neon.h>
+
+/* Folds to BIC.  */
+
+int32x4_t
+half_fold_int (uint32x4_t mask)
+{
+  int32x4_t a = {0, 0, 0, 0};
+  int32x4_t b = {2, 4, 8, 16};
+  return vbslq_s32 (mask, a, b);
+}
+
+/* { dg-final { scan-assembler-not "bsl\\tv" } } */
+/* { dg-final { scan-assembler-not "bit\\tv" } } */
+/* { dg-final { scan-assembler-not "bif\\tv" } } */
+/* { dg-final { scan-assembler "bic\\tv" } } */
+
+/* { dg-final { cleanup-saved-temps } } */
+
--- a/src/gcc/testsuite/gcc.target/aarch64/vdup_n_2.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/vdup_n_2.c
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-inline --save-temps" } */
+
+extern void abort (void);
+
+typedef float float32x2_t __attribute__ ((__vector_size__ ((8))));
+typedef unsigned int uint32x2_t __attribute__ ((__vector_size__ ((8))));
+
+float32x2_t
+test_dup_1 (float32x2_t in)
+{
+  return __builtin_shuffle (in, (uint32x2_t) {1, 1});
+}
+
+int
+main (int argc, char **argv)
+{
+  float32x2_t test = {2.718, 3.141};
+  float32x2_t res = test_dup_1 (test);
+  if (res[0] != test[1] || res[1] != test[1])
+    abort ();
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times "\[ \t\]*dup\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.s\\\[\[01\]\\\]" 1 } } */
+/* { dg-final { scan-assembler-not "zip" } } */
+/* { dg-final { cleanup-saved-temps } } */
+
--- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_5.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_5.c
@@ -0,0 +1,13 @@
+/* Verify:
+     * -fomit-frame-pointer.
+     * with outgoing.
+     * total frame size <= 512.
+     * one subtraction of the whole frame size.  */
+
+/* { dg-do run } */
+/* { dg-options "-O2 -fomit-frame-pointer" } */
+
+#include "test_frame_common.h"
+
+t_frame_pattern_outgoing (test5, 300, "x19", 8, a[8])
+t_frame_run (test5)
--- a/src/gcc/testsuite/gcc.target/aarch64/vld1-vst1_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/vld1-vst1_1.c
@@ -5,48 +5,54 @@
 
 extern void abort (void);
 
-int __attribute__ ((noinline))
-test_vld1_vst1 ()
-{
-  int8x8_t a;
-  int8x8_t b;
-  int i = 0;
-  int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
-  int8_t d[8];
-  a = vld1_s8 (c);
-  asm volatile ("":::"memory");
-  vst1_s8 (d, a);
-  asm volatile ("":::"memory");
-  for (; i < 8; i++)
-    if (c[i] != d[i])
-      return 1;
-  return 0;
+#define TESTMETH(TYPE, NUM, BASETYPE, SUFFIX)	\
+int __attribute__ ((noinline))			\
+test_vld1_vst1##SUFFIX ()			\
+{						\
+  TYPE vec;					\
+  int i = 0;					\
+  BASETYPE src[NUM];				\
+  BASETYPE dest[NUM];				\
+  for (i = 0; i < NUM; i++)			\
+    src[i] = 2*i + 1;				\
+  asm volatile ("":::"memory");			\
+  vec = vld1 ## SUFFIX (src);			\
+  asm volatile ("":::"memory");			\
+  vst1 ## SUFFIX (dest, vec);			\
+  asm volatile ("":::"memory");			\
+  for (i = 0; i < NUM; i++)			\
+    if (src[i] != dest[i])			\
+      return 1;					\
+  return 0;					\
 }
 
-int __attribute__ ((noinline))
-test_vld1q_vst1q ()
-{
-  int16x8_t a;
-  int16x8_t b;
-  int i = 0;
-  int16_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
-  int16_t d[8];
-  a = vld1q_s16 (c);
-  asm volatile ("":::"memory");
-  vst1q_s16 (d, a);
-  asm volatile ("":::"memory");
-  for (; i < 8; i++)
-    if (c[i] != d[i])
-      return 1;
-  return 0;
-}
+#define VARIANTS(THING)			\
+THING (int8x8_t, 8, int8_t, _s8)	\
+THING (uint8x8_t, 8, uint8_t, _u8)	\
+THING (int16x4_t, 4, int16_t, _s16)	\
+THING (uint16x4_t, 4, uint16_t, _u16)	\
+THING (int32x2_t, 2, int32_t, _s32)	\
+THING (uint32x2_t, 2, uint32_t, _u32)	\
+THING (float32x2_t, 2, float32_t, _f32) \
+THING (int8x16_t, 16, int8_t, q_s8)	\
+THING (uint8x16_t, 16, uint8_t, q_u8)	\
+THING (int16x8_t, 8, int16_t, q_s16)	\
+THING (uint16x8_t, 8, uint16_t, q_u16)	\
+THING (int32x4_t, 4, int32_t, q_s32)	\
+THING (uint32x4_t, 4, uint32_t, q_u32)	\
+THING (int64x2_t, 2, int64_t, q_s64)	\
+THING (uint64x2_t, 2, uint64_t, q_u64)	\
+THING (float64x2_t, 2, float64_t, q_f64)
 
+VARIANTS (TESTMETH)
+
+#define DOTEST(TYPE, NUM, BASETYPE, SUFFIX)	\
+  if (test_vld1_vst1##SUFFIX ())		\
+    abort ();
+
 int
 main ()
 {
-  if (test_vld1_vst1 ())
-    abort ();
-  if (test_vld1q_vst1q ())
-    abort ();
+  VARIANTS (DOTEST);
   return 0;
 }
--- a/src/gcc/testsuite/gcc.target/aarch64/cvtf_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/cvtf_1.c
@@ -0,0 +1,95 @@
+/* { dg-do run } */
+/* { dg-options "-save-temps -fno-inline -O1" } */
+
+#define FCVTDEF(ftype,itype) \
+void \
+cvt_##itype##_to_##ftype (itype a, ftype b)\
+{\
+  ftype c;\
+  c = (ftype) a;\
+  if ( (c - b) > 0.00001) abort();\
+}
+
+#define force_simd_for_float(v) asm volatile ("mov %s0, %1.s[0]" :"=w" (v) :"w" (v) :)
+#define force_simd_for_double(v) asm volatile ("mov %d0, %1.d[0]" :"=w" (v) :"w" (v) :)
+
+#define FCVTDEF_SISD(ftype,itype) \
+void \
+cvt_##itype##_to_##ftype##_sisd (itype a, ftype b)\
+{\
+  ftype c;\
+  force_simd_for_##ftype(a);\
+  c = (ftype) a;\
+  if ( (c - b) > 0.00001) abort();\
+}
+
+#define FCVT(ftype,itype,ival,fval) cvt_##itype##_to_##ftype (ival, fval);
+#define FCVT_SISD(ftype,itype,ival,fval) cvt_##itype##_to_##ftype##_sisd (ival, fval);
+
+typedef int int32_t;
+typedef unsigned int uint32_t;
+typedef long long int int64_t;
+typedef unsigned long long int uint64_t;
+
+extern void abort();
+
+FCVTDEF (float, int32_t)
+/* { dg-final { scan-assembler "scvtf\ts\[0-9\]+,\ w\[0-9\]+" } } */
+FCVTDEF (float, uint32_t)
+/* { dg-final { scan-assembler "ucvtf\ts\[0-9\]+,\ w\[0-9\]+" } } */
+FCVTDEF (double, int32_t)
+/* "scvtf\td\[0-9\]+,\ w\[0-9\]+" */
+FCVTDEF (double, uint32_t)
+/* "ucvtf\td\[0-9\]+,\ w\[0-9\]+" */
+FCVTDEF (float, int64_t)
+/* "scvtf\ts\[0-9\]+,\ x\[0-9\]+" */
+FCVTDEF (float, uint64_t)
+/* "ucvtf\ts\[0-9\]+,\ x\[0-9\]+" */
+FCVTDEF (double, int64_t)
+/* { dg-final { scan-assembler "scvtf\td\[0-9\]+,\ x\[0-9\]+" } } */
+FCVTDEF (double, uint64_t)
+/* { dg-final { scan-assembler "ucvtf\td\[0-9\]+,\ x\[0-9\]+" } } */
+FCVTDEF_SISD (float, int32_t)
+/* { dg-final { scan-assembler "scvtf\ts\[0-9\]+,\ s\[0-9\]+" } } */
+FCVTDEF_SISD (double, int64_t)
+/* { dg-final { scan-assembler "scvtf\td\[0-9\]+,\ d\[0-9\]+" } } */
+FCVTDEF_SISD (float, uint32_t)
+/* { dg-final { scan-assembler "ucvtf\ts\[0-9\]+,\ s\[0-9\]+" } } */
+FCVTDEF_SISD (double, uint64_t)
+/* { dg-final { scan-assembler "ucvtf\td\[0-9\]+,\ d\[0-9\]+" } } */
+FCVTDEF_SISD (float, int64_t)
+/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\]+,\ x\[0-9\]+" 2 } } */
+FCVTDEF_SISD (float, uint64_t)
+/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\]+,\ x\[0-9\]+" 2 } } */
+FCVTDEF_SISD (double, int32_t)
+/* { dg-final { scan-assembler-times "scvtf\td\[0-9\]+,\ w\[0-9\]+" 2 } } */
+FCVTDEF_SISD (double, uint32_t)
+/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\]+,\ w\[0-9\]+" 2 } } */
+
+int32_t ival = -1234;
+int64_t llival = -13031303L;
+uint32_t uival = 1234;
+uint64_t ullival = 13031303L;
+
+int main ()
+{
+  float x;
+  double y;
+
+  FCVT (float, int32_t, ival, -1234.0);
+  FCVT (float, uint32_t, uival, 1234.0);
+  FCVT (float, int64_t, llival, -13031303.0);
+  FCVT (float, uint64_t, ullival, 13031303.0);
+  FCVT (double, int32_t, ival, -1234.0);
+  FCVT (double, uint32_t, uival, 1234.0);
+  FCVT (double, int64_t, llival, -13031303.0);
+  FCVT (double, uint64_t, ullival, 13031303.0);
+  FCVT_SISD (float, int32_t, ival, -1234.0);
+  FCVT_SISD (double, int64_t, llival, -13031303.0);
+  FCVT_SISD (float, uint32_t, uival, 1234.0);
+  FCVT_SISD (double, uint64_t, ullival, 13031303.0);
+
+  return 0;
+}
+
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/vqdmulls_lane_s32.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/vqdmulls_lane_s32.c
@@ -5,7 +5,7 @@
 
 #include "arm_neon.h"
 
-int64x1_t
+int64_t
 t_vqdmulls_lane_s32 (int32_t a, int32x2_t b)
 {
   return vqdmulls_lane_s32 (a, b, 0);
--- a/src/gcc/testsuite/gcc.target/aarch64/pr64263_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/pr64263_1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O1" } */
+
+#include "arm_neon.h"
+
+extern long int vget_lane_s64_1 (int64x1_t, const int);
+
+void
+foo ()
+{
+  int8x8_t val14;
+  int8x8_t val15;
+  uint8x8_t val16;
+  uint32x4_t val40;
+  val14 = vcreate_s8 (0xff0080f6807f807fUL);
+  val15 = vcreate_s8 (0x10807fff7f808080UL);
+  val16 = vcgt_s8 (val14, val15);
+  val40 = vreinterpretq_u32_u64 (
+    vdupq_n_u64 (
+         vget_lane_s64_1 (
+         vreinterpret_s64_u8 (val16), 0)
+    ));
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/reload-valid-spoff.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/reload-valid-spoff.c
@@ -17,6 +17,11 @@
 };
 typedef struct _IO_FILE FILE;
 extern char *fgets (char *__restrict __s, int __n, FILE *__restrict __stream);
+extern void *memset (void *s, int c, size_t n);
+extern void *memcpy (void *dest, const void *src, size_t n);
+extern int fprintf (FILE *stream, const char *format, ...);
+extern char * safe_strncpy (char *dst, const char *src, size_t size);
+extern size_t strlen (const char *s);
 extern struct _IO_FILE *stderr;
 extern int optind;
 struct aftype {
--- a/src/gcc/testsuite/gcc.target/aarch64/tail_indirect_call_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/tail_indirect_call_1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+typedef void FP (int);
+
+/* { dg-final { scan-assembler "br" } } */
+/* { dg-final { scan-assembler-not "blr" } } */
+void
+f1 (FP fp, int n)
+{
+  (fp) (n);
+}
+
+void
+f2 (int n, FP fp)
+{
+  (fp) (n);
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/vqdml_lane_intrinsics-bad_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/vqdml_lane_intrinsics-bad_1.c
@@ -0,0 +1,54 @@
+/* { dg-do compile } */
+
+#include "arm_neon.h"
+
+int32x4_t
+foo (int32x4_t a, int16x4_t b, int16x4_t c, int d)
+{
+  return vqdmlal_lane_s16 (a, b, c, d);
+}
+
+int32x4_t
+foo1 (int32x4_t a, int16x4_t b, int16x8_t c, int d)
+{
+  return vqdmlal_laneq_s16 (a, b, c, d);
+}
+
+int32x4_t
+foo2 (int32x4_t a, int16x4_t b, int16x4_t c, int d)
+{
+  return vqdmlsl_lane_s16 (a, b, c, d);
+}
+
+int32x4_t
+foo3 (int32x4_t a, int16x4_t b, int16x8_t c, int d)
+{
+  return vqdmlsl_laneq_s16 (a, b, c, d);
+}
+
+int32x4_t
+foo4 (int32x4_t a, int16x8_t b, int16x4_t c, int d)
+{
+  return vqdmlal_high_lane_s16 (a, b, c, d);
+}
+
+int32x4_t
+foo5 (int32x4_t a, int16x8_t b, int16x4_t c, int d)
+{
+  return vqdmlsl_high_lane_s16 (a, b, c, d);
+}
+
+int32x4_t
+foo6 (int32x4_t a, int16x8_t b, int16x8_t c, int d)
+{
+  return vqdmlal_high_laneq_s16 (a, b, c, d);
+}
+
+int32x4_t
+foo7 (int32x4_t a, int16x8_t b, int16x8_t c, int d)
+{
+  return vqdmlsl_high_laneq_s16 (a, b, c, d);
+}
+
+
+/* { dg-excess-errors "incompatible type for argument" } */
--- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_6.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_6.c
@@ -0,0 +1,20 @@
+/* Verify:
+     * -fomit-frame-pointer.
+     * without outgoing.
+     * total frame size > 512.
+     * number of callee-saved reg == 1.
+     * split stack adjustment into two subtractions.
+       the second subtraction should use "str !".  */
+
+/* { dg-do run } */
+/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */
+
+#include "test_frame_common.h"
+
+t_frame_pattern (test6, 700, )
+t_frame_run (test6)
+
+/* { dg-final { scan-assembler-times "str\tx30, \\\[sp, -\[0-9\]+\\\]!" 2 } } */
+/* { dg-final { scan-assembler-times "ldr\tx30, \\\[sp\\\], \[0-9\]+" 3 } } */
+
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmls_n.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmls_n.c
@@ -0,0 +1,25 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define INSN_NAME vmls
+#define TEST_MSG "VMLS_N"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfa4b, 0xfa4c, 0xfa4d, 0xfa4e };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffff4a6, 0xfffff4a7 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xef01, 0xef02, 0xef03, 0xef04 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffe95c, 0xffffe95d };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc49bdeb8, 0xc49bbeb8 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xe3b7, 0xe3b8, 0xe3b9, 0xe3ba,
+					0xe3bb, 0xe3bc, 0xe3bd, 0xe3be };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffde12, 0xffffde13,
+					0xffffde14, 0xffffde15 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xd86d, 0xd86e, 0xd86f, 0xd870,
+					 0xd871, 0xd872, 0xd873, 0xd874 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffd2c8, 0xffffd2c9,
+					 0xffffd2ca, 0xffffd2cb };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc56a087b, 0xc569f87b,
+					   0xc569e87b, 0xc569d87b };
+
+#include "vmlX_n.inc"
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmlal_lane.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmlal_lane.c
@@ -0,0 +1,38 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define INSN_NAME vqdmlal_lane
+#define TEST_MSG "VQDMLAL_LANE"
+
+/* Expected values of cumulative_saturation flag.  */
+int VECT_VAR(expected_cumulative_sat,int,32,4) = 0;
+int VECT_VAR(expected_cumulative_sat,int,64,2) = 0;
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x7c1e, 0x7c1f, 0x7c20, 0x7c21 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x7c1e, 0x7c1f };
+
+/* Expected values of cumulative_saturation flag when multiplying with
+   0.  */
+int VECT_VAR(expected_cumulative_sat2,int,32,4) = 0;
+int VECT_VAR(expected_cumulative_sat2,int,64,2) = 0;
+
+/* Expected values when multiplying with 0.  */
+VECT_VAR_DECL(expected2,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
+					 0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected2,int,64,2) [] = { 0xfffffffffffffff0,
+					 0xfffffffffffffff1 };
+
+/* Expected values of cumulative_saturation flag when multiplication
+   saturates.  */
+int VECT_VAR(expected_cumulative_sat3,int,32,4) = 1;
+int VECT_VAR(expected_cumulative_sat3,int,64,2) = 1;
+
+/* Expected values when multiplication saturates.  */
+VECT_VAR_DECL(expected3,int,32,4) [] = { 0x7fffffef, 0x7ffffff0,
+					 0x7ffffff1, 0x7ffffff2 };
+VECT_VAR_DECL(expected3,int,64,2) [] = { 0x7fffffffffffffef,
+					 0x7ffffffffffffff0 };
+
+#include "vqdmlXl_lane.inc"
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqadd.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqadd.c
@@ -0,0 +1,278 @@
+#define INSN_NAME vqadd
+#define TEST_MSG "VQADD/VQADDQ"
+
+/* Extra tests for special cases:
+   - some requiring intermediate types larger than 64 bits to
+   compute saturation flag.
+   - corner case saturations with types smaller than 64 bits.
+*/
+void vqadd_extras(void);
+#define EXTRA_TESTS vqadd_extras
+
+#include "binary_sat_op.inc"
+
+/* Expected values of cumulative_saturation flag.  */
+int VECT_VAR(expected_cumulative_sat,int,8,8) = 0;
+int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;
+int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;
+int VECT_VAR(expected_cumulative_sat,int,64,1) = 0;
+int VECT_VAR(expected_cumulative_sat,uint,8,8) = 1;
+int VECT_VAR(expected_cumulative_sat,uint,16,4) = 1;
+int VECT_VAR(expected_cumulative_sat,uint,32,2) = 1;
+int VECT_VAR(expected_cumulative_sat,uint,64,1) = 1;
+int VECT_VAR(expected_cumulative_sat,int,8,16) = 0;
+int VECT_VAR(expected_cumulative_sat,int,16,8) = 0;
+int VECT_VAR(expected_cumulative_sat,int,32,4) = 0;
+int VECT_VAR(expected_cumulative_sat,int,64,2) = 0;
+int VECT_VAR(expected_cumulative_sat,uint,8,16) = 1;
+int VECT_VAR(expected_cumulative_sat,uint,16,8) = 1;
+int VECT_VAR(expected_cumulative_sat,uint,32,4) = 1;
+int VECT_VAR(expected_cumulative_sat,uint,64,2) = 1;
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x1, 0x2, 0x3, 0x4,
+				       0x5, 0x6, 0x7, 0x8 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x12, 0x13, 0x14, 0x15 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x23, 0x24 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x34 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
+					0xff, 0xff, 0xff, 0xff };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffff };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x1, 0x2, 0x3, 0x4,
+					0x5, 0x6, 0x7, 0x8,
+					0x9, 0xa, 0xb, 0xc,
+					0xd, 0xe, 0xf, 0x10 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x12, 0x13, 0x14, 0x15,
+					0x16, 0x17, 0x18, 0x19 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x23, 0x24, 0x25, 0x26 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x34, 0x35 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
+					 0xff, 0xff, 0xff, 0xff,
+					 0xff, 0xff, 0xff, 0xff,
+					 0xff, 0xff, 0xff, 0xff };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,
+					 0xffff, 0xffff, 0xffff, 0xffff };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff,
+					 0xffffffff, 0xffffffff };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffff,
+					 0xffffffffffffffff };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+					   0x33333333, 0x33333333 };
+
+
+/* 64-bits types, with 0 as second input.  */
+int VECT_VAR(expected_cumulative_sat_64,int,64,1) = 0;
+int VECT_VAR(expected_cumulative_sat_64,uint,64,1) = 0;
+int VECT_VAR(expected_cumulative_sat_64,int,64,2) = 0;
+int VECT_VAR(expected_cumulative_sat_64,uint,64,2) = 0;
+VECT_VAR_DECL(expected_64,int,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_64,uint,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_64,int,64,2) [] = { 0xfffffffffffffff0,
+					   0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_64,uint,64,2) [] = { 0xfffffffffffffff0,
+					    0xfffffffffffffff1 };
+
+/* 64-bits types, some cases causing cumulative saturation.  */
+int VECT_VAR(expected_cumulative_sat_64_2,int,64,1) = 0;
+int VECT_VAR(expected_cumulative_sat_64_2,uint,64,1) = 1;
+int VECT_VAR(expected_cumulative_sat_64_2,int,64,2) = 0;
+int VECT_VAR(expected_cumulative_sat_64_2,uint,64,2) = 1;
+VECT_VAR_DECL(expected_64_2,int,64,1) [] = { 0x34 };
+VECT_VAR_DECL(expected_64_2,uint,64,1) [] = { 0xffffffffffffffff };
+VECT_VAR_DECL(expected_64_2,int,64,2) [] = { 0x34, 0x35 };
+VECT_VAR_DECL(expected_64_2,uint,64,2) [] = { 0xffffffffffffffff,
+					      0xffffffffffffffff };
+
+/* 64-bits types, all causing cumulative saturation.  */
+int VECT_VAR(expected_cumulative_sat_64_3,int,64,1) = 1;
+int VECT_VAR(expected_cumulative_sat_64_3,uint,64,1) = 1;
+int VECT_VAR(expected_cumulative_sat_64_3,int,64,2) = 1;
+int VECT_VAR(expected_cumulative_sat_64_3,uint,64,2) = 1;
+VECT_VAR_DECL(expected_64_3,int,64,1) [] = { 0x8000000000000000 };
+VECT_VAR_DECL(expected_64_3,uint,64,1) [] = { 0xffffffffffffffff };
+VECT_VAR_DECL(expected_64_3,int,64,2) [] = { 0x7fffffffffffffff,
+					     0x7fffffffffffffff };
+VECT_VAR_DECL(expected_64_3,uint,64,2) [] = { 0xffffffffffffffff,
+					      0xffffffffffffffff };
+
+/* smaller types, corner cases causing cumulative saturation. (1)  */
+int VECT_VAR(expected_csat_lt_64_1,int,8,8) = 1;
+int VECT_VAR(expected_csat_lt_64_1,int,16,4) = 1;
+int VECT_VAR(expected_csat_lt_64_1,int,32,2) = 1;
+int VECT_VAR(expected_csat_lt_64_1,int,8,16) = 1;
+int VECT_VAR(expected_csat_lt_64_1,int,16,8) = 1;
+int VECT_VAR(expected_csat_lt_64_1,int,32,4) = 1;
+VECT_VAR_DECL(expected_lt_64_1,int,8,8) [] = { 0x80, 0x80, 0x80, 0x80,
+					       0x80, 0x80, 0x80, 0x80 };
+VECT_VAR_DECL(expected_lt_64_1,int,16,4) [] = { 0x8000, 0x8000,
+						0x8000, 0x8000 };
+VECT_VAR_DECL(expected_lt_64_1,int,32,2) [] = { 0x80000000, 0x80000000 };
+VECT_VAR_DECL(expected_lt_64_1,int,8,16) [] = { 0x80, 0x80, 0x80, 0x80,
+						0x80, 0x80, 0x80, 0x80,
+						0x80, 0x80, 0x80, 0x80,
+						0x80, 0x80, 0x80, 0x80 };
+VECT_VAR_DECL(expected_lt_64_1,int,16,8) [] = { 0x8000, 0x8000,
+						0x8000, 0x8000,
+						0x8000, 0x8000,
+						0x8000, 0x8000 };
+VECT_VAR_DECL(expected_lt_64_1,int,32,4) [] = { 0x80000000, 0x80000000,
+						0x80000000, 0x80000000 };
+
+/* smaller types, corner cases causing cumulative saturation. (2)  */
+int VECT_VAR(expected_csat_lt_64_2,uint,8,8) = 1;
+int VECT_VAR(expected_csat_lt_64_2,uint,16,4) = 1;
+int VECT_VAR(expected_csat_lt_64_2,uint,32,2) = 1;
+int VECT_VAR(expected_csat_lt_64_2,uint,8,16) = 1;
+int VECT_VAR(expected_csat_lt_64_2,uint,16,8) = 1;
+int VECT_VAR(expected_csat_lt_64_2,uint,32,4) = 1;
+VECT_VAR_DECL(expected_lt_64_2,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
+						0xff, 0xff, 0xff, 0xff };
+VECT_VAR_DECL(expected_lt_64_2,uint,16,4) [] = { 0xffff, 0xffff,
+						 0xffff, 0xffff };
+VECT_VAR_DECL(expected_lt_64_2,uint,32,2) [] = { 0xffffffff,
+						 0xffffffff };
+VECT_VAR_DECL(expected_lt_64_2,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
+						 0xff, 0xff, 0xff, 0xff,
+						 0xff, 0xff, 0xff, 0xff,
+						 0xff, 0xff, 0xff, 0xff };
+VECT_VAR_DECL(expected_lt_64_2,uint,16,8) [] = { 0xffff, 0xffff,
+						 0xffff, 0xffff,
+						 0xffff, 0xffff,
+						 0xffff, 0xffff };
+VECT_VAR_DECL(expected_lt_64_2,uint,32,4) [] = { 0xffffffff, 0xffffffff,
+						 0xffffffff, 0xffffffff };
+
+void vqadd_extras(void)
+{
+  DECL_VARIABLE_ALL_VARIANTS(vector1);
+  DECL_VARIABLE_ALL_VARIANTS(vector2);
+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+  /* Initialize input "vector1" from "buffer".  */
+  TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector1, buffer);
+
+  /* Use a second vector full of 0.  */
+  VDUP(vector2, , int, s, 64, 1, 0);
+  VDUP(vector2, , uint, u, 64, 1, 0);
+  VDUP(vector2, q, int, s, 64, 2, 0);
+  VDUP(vector2, q, uint, u, 64, 2, 0);
+
+#define MSG "64 bits saturation adding zero"
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 64, 1, expected_cumulative_sat_64, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 64, 1, expected_cumulative_sat_64, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 64, 2, expected_cumulative_sat_64, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 64, 2, expected_cumulative_sat_64, MSG);
+
+  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_64, MSG);
+  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_64, MSG);
+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_64, MSG);
+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_64, MSG);
+
+  /* Another set of tests with non-zero values, some chosen to create
+     overflow.  */
+  VDUP(vector2, , int, s, 64, 1, 0x44);
+  VDUP(vector2, , uint, u, 64, 1, 0x88);
+  VDUP(vector2, q, int, s, 64, 2, 0x44);
+  VDUP(vector2, q, uint, u, 64, 2, 0x88);
+
+#undef MSG
+#define MSG "64 bits saturation cumulative_sat (2)"
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 64, 1, expected_cumulative_sat_64_2, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 64, 1, expected_cumulative_sat_64_2, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 64, 2, expected_cumulative_sat_64_2, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 64, 2, expected_cumulative_sat_64_2, MSG);
+
+  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_64_2, MSG);
+  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_64_2, MSG);
+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_64_2, MSG);
+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_64_2, MSG);
+
+  /* Another set of tests, with input values chosen to set
+     cumulative_sat in all cases.  */
+  VDUP(vector2, , int, s, 64, 1, 0x8000000000000003LL);
+  VDUP(vector2, , uint, u, 64, 1, 0x88);
+  /* To check positive saturation, we need to write a positive value
+     in vector1.  */
+  VDUP(vector1, q, int, s, 64, 2, 0x4000000000000000LL);
+  VDUP(vector2, q, int, s, 64, 2, 0x4000000000000000LL);
+  VDUP(vector2, q, uint, u, 64, 2, 0x22);
+
+#undef MSG
+#define MSG "64 bits saturation cumulative_sat (3)"
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 64, 1, expected_cumulative_sat_64_3, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 64, 1, expected_cumulative_sat_64_3, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 64, 2, expected_cumulative_sat_64_3, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 64, 2, expected_cumulative_sat_64_3, MSG);
+
+  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_64_3, MSG);
+  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_64_3, MSG);
+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_64_3, MSG);
+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_64_3, MSG);
+
+  /* To improve coverage, check saturation with less than 64 bits
+     too.  */
+  VDUP(vector2, , int, s, 8, 8, 0x81);
+  VDUP(vector2, , int, s, 16, 4, 0x8001);
+  VDUP(vector2, , int, s, 32, 2, 0x80000001);
+  VDUP(vector2, q, int, s, 8, 16, 0x81);
+  VDUP(vector2, q, int, s, 16, 8, 0x8001);
+  VDUP(vector2, q, int, s, 32, 4, 0x80000001);
+
+#undef MSG
+#define MSG "less than 64 bits saturation cumulative_sat (1)"
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 8, 8, expected_csat_lt_64_1, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 16, 4, expected_csat_lt_64_1, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 32, 2, expected_csat_lt_64_1, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 8, 16, expected_csat_lt_64_1, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 16, 8, expected_csat_lt_64_1, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 32, 4, expected_csat_lt_64_1, MSG);
+
+  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_lt_64_1, MSG);
+  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_lt_64_1, MSG);
+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_lt_64_1, MSG);
+  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_lt_64_1, MSG);
+  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_lt_64_1, MSG);
+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_lt_64_1, MSG);
+
+  /* Another set of tests with large vector1 values.  */
+  VDUP(vector1, , uint, u, 8, 8, 0xF0);
+  VDUP(vector1, , uint, u, 16, 4, 0xFFF0);
+  VDUP(vector1, , uint, u, 32, 2, 0xFFFFFFF0);
+  VDUP(vector1, q, uint, u, 8, 16, 0xF0);
+  VDUP(vector1, q, uint, u, 16, 8, 0xFFF0);
+  VDUP(vector1, q, uint, u, 32, 4, 0xFFFFFFF0);
+
+  VDUP(vector2, , uint, u, 8, 8, 0x20);
+  VDUP(vector2, , uint, u, 16, 4, 0x20);
+  VDUP(vector2, , uint, u, 32, 2, 0x20);
+  VDUP(vector2, q, uint, u, 8, 16, 0x20);
+  VDUP(vector2, q, uint, u, 16, 8, 0x20);
+  VDUP(vector2, q, uint, u, 32, 4, 0x20);
+
+#undef MSG
+#define MSG "less than 64 bits saturation cumulative_sat (2)"
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 8, 8, expected_csat_lt_64_2, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 16, 4, expected_csat_lt_64_2, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 32, 2, expected_csat_lt_64_2, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 8, 16, expected_csat_lt_64_2, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 16, 8, expected_csat_lt_64_2, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 32, 4, expected_csat_lt_64_2, MSG);
+
+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_lt_64_2, MSG);
+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_lt_64_2, MSG);
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_lt_64_2, MSG);
+  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_lt_64_2, MSG);
+  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_lt_64_2, MSG);
+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_lt_64_2, MSG);
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmla.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmla.c
@@ -0,0 +1,35 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define INSN_NAME vmla
+#define TEST_MSG "VMLA"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xdf, 0xe0, 0xe1, 0xe2,
+				       0xe3, 0xe4, 0xe5, 0xe6 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x1f8c, 0x1f8d, 0x1f8e, 0x1f8f };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x2bf7, 0x2bf8 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x20, 0x21, 0x22, 0x23,
+					0x24, 0x25, 0x26, 0x27 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x43ac, 0x43ad };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x43a14e76, 0x43a1ce76 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0xf, 0x10, 0x11, 0x12,
+					0x13, 0x14, 0x15, 0x16,
+					0x17, 0x18, 0x19, 0x1a,
+					0x1b, 0x1c, 0x1d, 0x1e };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x4830, 0x4831, 0x4832, 0x4833,
+					0x4834, 0x4835, 0x4836, 0x4837 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x470f, 0x4710, 0x4711, 0x4712 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xac, 0xad, 0xae, 0xaf,
+					 0xb0, 0xb1, 0xb2, 0xb3,
+					 0xb4, 0xb5, 0xb6, 0xb7,
+					 0xb8, 0xb9, 0xba, 0xbb };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a,
+					 0x3e0b, 0x3e0c, 0x3e0d, 0x3e0e };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x3620, 0x3621, 0x3622, 0x3623 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x45f0ae15, 0x45f0b615,
+					   0x45f0be15, 0x45f0c615 };
+
+#include "vmlX.inc"
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqsub.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqsub.c
@@ -0,0 +1,278 @@
+#define INSN_NAME vqsub
+#define TEST_MSG "VQSUB/VQSUBQ"
+
+/* Extra tests for special cases:
+   - some requiring intermediate types larger than 64 bits to
+   compute saturation flag.
+   - corner case saturations with types smaller than 64 bits.
+*/
+void vqsub_extras(void);
+#define EXTRA_TESTS vqsub_extras
+
+#include "binary_sat_op.inc"
+
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xdf, 0xe0, 0xe1, 0xe2,
+				       0xe3, 0xe4, 0xe5, 0xe6 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xffce, 0xffcf,
+					0xffd0, 0xffd1 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xffffffbd, 0xffffffbe };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffffac };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x9b, 0x9c, 0x9d, 0x9e,
+					0x9f, 0xa0, 0xa1, 0xa2 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xff8a, 0xff8b,
+					 0xff8c, 0xff8d };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffff79, 0xffffff7a };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffff68 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0xdf, 0xe0, 0xe1, 0xe2,
+					0xe3, 0xe4, 0xe5, 0xe6,
+					0xe7, 0xe8, 0xe9, 0xea,
+					0xeb, 0xec, 0xed, 0xee };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xffce, 0xffcf, 0xffd0, 0xffd1,
+					0xffd2, 0xffd3, 0xffd4, 0xffd5 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffbd, 0xffffffbe,
+					0xffffffbf, 0xffffffc0 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffac,
+					0xffffffffffffffad };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x9b, 0x9c, 0x9d, 0x9e,
+					 0x9f, 0xa0, 0xa1, 0xa2,
+					 0xa3, 0xa4, 0xa5, 0xa6,
+					 0xa7, 0xa8, 0xa9, 0xaa };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xff8a, 0xff8b, 0xff8c, 0xff8d,
+					 0xff8e, 0xff8f, 0xff90, 0xff91 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffff79, 0xffffff7a,
+					 0xffffff7b, 0xffffff7c };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffff68,
+					 0xffffffffffffff69 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+					   0x33333333, 0x33333333 };
+
+/* Expected values of cumulative saturation flag.  */
+int VECT_VAR(expected_cumulative_sat,int,8,8) = 0;
+int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;
+int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;
+int VECT_VAR(expected_cumulative_sat,int,64,1) = 0;
+int VECT_VAR(expected_cumulative_sat,uint,8,8) = 0;
+int VECT_VAR(expected_cumulative_sat,uint,16,4) = 0;
+int VECT_VAR(expected_cumulative_sat,uint,32,2) = 0;
+int VECT_VAR(expected_cumulative_sat,uint,64,1) = 0;
+int VECT_VAR(expected_cumulative_sat,int,8,16) = 0;
+int VECT_VAR(expected_cumulative_sat,int,16,8) = 0;
+int VECT_VAR(expected_cumulative_sat,int,32,4) = 0;
+int VECT_VAR(expected_cumulative_sat,int,64,2) = 0;
+int VECT_VAR(expected_cumulative_sat,uint,8,16) = 0;
+int VECT_VAR(expected_cumulative_sat,uint,16,8) = 0;
+int VECT_VAR(expected_cumulative_sat,uint,32,4) = 0;
+int VECT_VAR(expected_cumulative_sat,uint,64,2) = 0;
+
+/* 64-bits types, with 0 as second input.  */
+VECT_VAR_DECL(expected_64,int,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_64,uint,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_64,int,64,2) [] = { 0xfffffffffffffff0,
+					   0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_64,uint,64,2) [] = { 0xfffffffffffffff0,
+					    0xfffffffffffffff1 };
+int VECT_VAR(expected_cumulative_sat_64,int,64,1) = 0;
+int VECT_VAR(expected_cumulative_sat_64,uint,64,1) = 0;
+int VECT_VAR(expected_cumulative_sat_64,int,64,2) = 0;
+int VECT_VAR(expected_cumulative_sat_64,uint,64,2) = 0;
+
+/* 64-bits types, other cases.  */
+VECT_VAR_DECL(expected_64_2,int,64,1) [] = { 0xffffffffffffffac };
+VECT_VAR_DECL(expected_64_2,uint,64,1) [] = { 0xffffffffffffff68 };
+VECT_VAR_DECL(expected_64_2,int,64,2) [] = { 0xffffffffffffffac,
+					     0xffffffffffffffad };
+VECT_VAR_DECL(expected_64_2,uint,64,2) [] = { 0xffffffffffffff68,
+					      0xffffffffffffff69 };
+int VECT_VAR(expected_cumulative_sat_64_2,int,64,1) = 0;
+int VECT_VAR(expected_cumulative_sat_64_2,uint,64,1) = 0;
+int VECT_VAR(expected_cumulative_sat_64_2,int,64,2) = 0;
+int VECT_VAR(expected_cumulative_sat_64_2,uint,64,2) = 0;
+
+/* 64-bits types, all causing cumulative saturation.  */
+VECT_VAR_DECL(expected_64_3,int,64,1) [] = { 0x8000000000000000 };
+VECT_VAR_DECL(expected_64_3,uint,64,1) [] = { 0x0 };
+VECT_VAR_DECL(expected_64_3,int,64,2) [] = { 0x7fffffffffffffff,
+					     0x7fffffffffffffff };
+VECT_VAR_DECL(expected_64_3,uint,64,2) [] = { 0x0, 0x0 };
+int VECT_VAR(expected_cumulative_sat_64_3,int,64,1) = 1;
+int VECT_VAR(expected_cumulative_sat_64_3,uint,64,1) = 1;
+int VECT_VAR(expected_cumulative_sat_64_3,int,64,2) = 1;
+int VECT_VAR(expected_cumulative_sat_64_3,uint,64,2) = 1;
+
+/* smaller types, corner cases causing cumulative saturation. (1)  */
+VECT_VAR_DECL(expected_lt_64_1,int,8,8) [] = { 0x80, 0x80, 0x80, 0x80,
+					       0x80, 0x80, 0x80, 0x80 };
+VECT_VAR_DECL(expected_lt_64_1,int,16,4) [] = { 0x8000, 0x8000,
+						0x8000, 0x8000 };
+VECT_VAR_DECL(expected_lt_64_1,int,32,2) [] = { 0x80000000, 0x80000000 };
+VECT_VAR_DECL(expected_lt_64_1,int,8,16) [] = { 0x80, 0x80, 0x80, 0x80,
+						0x80, 0x80, 0x80, 0x80,
+						0x80, 0x80, 0x80, 0x80,
+						0x80, 0x80, 0x80, 0x80 };
+VECT_VAR_DECL(expected_lt_64_1,int,16,8) [] = { 0x8000, 0x8000,
+						0x8000, 0x8000,
+						0x8000, 0x8000,
+						0x8000, 0x8000 };
+VECT_VAR_DECL(expected_lt_64_1,int,32,4) [] = { 0x80000000, 0x80000000,
+						0x80000000, 0x80000000 };
+int VECT_VAR(expected_csat_lt_64_1,int,8,8) = 1;
+int VECT_VAR(expected_csat_lt_64_1,int,16,4) = 1;
+int VECT_VAR(expected_csat_lt_64_1,int,32,2) = 1;
+int VECT_VAR(expected_csat_lt_64_1,int,8,16) = 1;
+int VECT_VAR(expected_csat_lt_64_1,int,16,8) = 1;
+int VECT_VAR(expected_csat_lt_64_1,int,32,4) = 1;
+
+/* smaller types, corner cases causing cumulative saturation. (2)  */
+VECT_VAR_DECL(expected_lt_64_2,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
+						0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_lt_64_2,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_lt_64_2,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_lt_64_2,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
+						 0x0, 0x0, 0x0, 0x0,
+						 0x0, 0x0, 0x0, 0x0,
+						 0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_lt_64_2,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
+						 0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_lt_64_2,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+int VECT_VAR(expected_csat_lt_64_2,uint,8,8) = 1;
+int VECT_VAR(expected_csat_lt_64_2,uint,16,4) = 1;
+int VECT_VAR(expected_csat_lt_64_2,uint,32,2) = 1;
+int VECT_VAR(expected_csat_lt_64_2,uint,8,16) = 1;
+int VECT_VAR(expected_csat_lt_64_2,uint,16,8) = 1;
+int VECT_VAR(expected_csat_lt_64_2,uint,32,4) = 1;
+
+void vqsub_extras(void)
+{
+  DECL_VARIABLE_ALL_VARIANTS(vector1);
+  DECL_VARIABLE_ALL_VARIANTS(vector2);
+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+  /* Initialize input "vector1" from "buffer".  */
+  TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector1, buffer);
+
+  /* Use a second vector full of 0.  */
+  VDUP(vector2, , int, s, 64, 1, 0x0);
+  VDUP(vector2, , uint, u, 64, 1, 0x0);
+  VDUP(vector2, q, int, s, 64, 2, 0x0);
+  VDUP(vector2, q, uint, u, 64, 2, 0x0);
+
+#define MSG "64 bits saturation when adding zero"
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 64, 1, expected_cumulative_sat_64, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 64, 1, expected_cumulative_sat_64, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 64, 2, expected_cumulative_sat_64, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 64, 2, expected_cumulative_sat_64, MSG);
+
+  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_64, MSG);
+  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_64, MSG);
+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_64, MSG);
+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_64, MSG);
+
+  /* Another set of tests with non-zero values.  */
+  VDUP(vector2, , int, s, 64, 1, 0x44);
+  VDUP(vector2, , uint, u, 64, 1, 0x88);
+  VDUP(vector2, q, int, s, 64, 2, 0x44);
+  VDUP(vector2, q, uint, u, 64, 2, 0x88);
+
+#undef MSG
+#define MSG "64 bits saturation cumulative_sat (2)"
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 64, 1, expected_cumulative_sat_64_2, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 64, 1, expected_cumulative_sat_64_2, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 64, 2, expected_cumulative_sat_64_2, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 64, 2, expected_cumulative_sat_64_2, MSG);
+
+  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_64_2, MSG);
+  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_64_2, MSG);
+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_64_2, MSG);
+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_64_2, MSG);
+
+  /* Another set of tests, with input values chosen to set
+     cumulative_sat in all cases.  */
+  VDUP(vector2, , int, s, 64, 1, 0x7fffffffffffffffLL);
+  VDUP(vector2, , uint, u, 64, 1, 0xffffffffffffffffULL);
+  /* To check positive saturation, we need to write a positive value
+     in vector1.  */
+  VDUP(vector1, q, int, s, 64, 2, 0x3fffffffffffffffLL);
+  VDUP(vector2, q, int, s, 64, 2, 0x8000000000000000LL);
+  VDUP(vector2, q, uint, u, 64, 2, 0xffffffffffffffffULL);
+
+#undef MSG
+#define MSG "64 bits saturation cumulative_sat (3)"
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 64, 1, expected_cumulative_sat_64_3, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 64, 1, expected_cumulative_sat_64_3, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 64, 2, expected_cumulative_sat_64_3, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 64, 2, expected_cumulative_sat_64_3, MSG);
+
+  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected_64_3, MSG);
+  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_64_3, MSG);
+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_64_3, MSG);
+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_64_3, MSG);
+
+  /* To improve coverage, check saturation with less than 64 bits
+     too.  */
+  VDUP(vector2, , int, s, 8, 8, 0x7F);
+  VDUP(vector2, , int, s, 16, 4, 0x7FFF);
+  VDUP(vector2, , int, s, 32, 2, 0x7FFFFFFF);
+  VDUP(vector2, q, int, s, 8, 16, 0x7F);
+  VDUP(vector2, q, int, s, 16, 8, 0x7FFF);
+  VDUP(vector2, q, int, s, 32, 4, 0x7FFFFFFF);
+
+#undef MSG
+#define MSG "less than 64 bits saturation cumulative_sat (1)"
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 8, 8, expected_csat_lt_64_1, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 16, 4, expected_csat_lt_64_1, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 32, 2, expected_csat_lt_64_1, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 8, 16, expected_csat_lt_64_1, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 16, 8, expected_csat_lt_64_1, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 32, 4, expected_csat_lt_64_1, MSG);
+
+  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_lt_64_1, MSG);
+  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_lt_64_1, MSG);
+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_lt_64_1, MSG);
+  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_lt_64_1, MSG);
+  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_lt_64_1, MSG);
+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_lt_64_1, MSG);
+
+  /* Another set of tests with vector1 values smaller than
+     vector2.  */
+  VDUP(vector1, , uint, u, 8, 8, 0x10);
+  VDUP(vector1, , uint, u, 16, 4, 0x10);
+  VDUP(vector1, , uint, u, 32, 2, 0x10);
+  VDUP(vector1, q, uint, u, 8, 16, 0x10);
+  VDUP(vector1, q, uint, u, 16, 8, 0x10);
+  VDUP(vector1, q, uint, u, 32, 4, 0x10);
+
+  VDUP(vector2, , uint, u, 8, 8, 0x20);
+  VDUP(vector2, , uint, u, 16, 4, 0x20);
+  VDUP(vector2, , uint, u, 32, 2, 0x20);
+  VDUP(vector2, q, uint, u, 8, 16, 0x20);
+  VDUP(vector2, q, uint, u, 16, 8, 0x20);
+  VDUP(vector2, q, uint, u, 32, 4, 0x20);
+
+#undef MSG
+#define MSG "less than 64 bits saturation cumulative_sat (2)"
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 8, 8, expected_csat_lt_64_2, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 16, 4, expected_csat_lt_64_2, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 32, 2, expected_csat_lt_64_2, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 8, 16, expected_csat_lt_64_2, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 16, 8, expected_csat_lt_64_2, MSG);
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 32, 4, expected_csat_lt_64_2, MSG);
+
+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_lt_64_2, MSG);
+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_lt_64_2, MSG);
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_lt_64_2, MSG);
+  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_lt_64_2, MSG);
+  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_lt_64_2, MSG);
+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_lt_64_2, MSG);
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmull_n.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmull_n.c
@@ -0,0 +1,92 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected values of cumulative_saturation flag.  */
+int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;
+int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x44000, 0x44000,
+					0x44000, 0x44000 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0xaa000, 0xaa000 };
+
+/* Expected values of cumulative_saturation flag when saturation
+   occurs.  */
+int VECT_VAR(expected_cumulative_sat2,int,16,4) = 1;
+int VECT_VAR(expected_cumulative_sat2,int,32,2) = 1;
+
+/* Expected results when saturation occurs.  */
+VECT_VAR_DECL(expected2,int,32,4) [] = { 0x7fffffff, 0x7fffffff,
+					 0x7fffffff, 0x7fffffff };
+VECT_VAR_DECL(expected2,int,64,2) [] = { 0x7fffffffffffffff,
+					 0x7fffffffffffffff };
+
+#define INSN_NAME vqdmull
+#define TEST_MSG "VQDMULL_N"
+
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+  int i;
+
+  /* vector_res = vqdmull_n(vector,val), then store the result.  */
+#define TEST_VQDMULL_N2(INSN, T1, T2, W, W2, N, L, EXPECTED_CUMULATIVE_SAT, CMT) \
+  Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W2, N));	\
+  VECT_VAR(vector_res, T1, W2, N) =				\
+    INSN##_n_##T2##W(VECT_VAR(vector, T1, W, N),		\
+		     L);					\
+  vst1q_##T2##W2(VECT_VAR(result, T1, W2, N),			\
+		 VECT_VAR(vector_res, T1, W2, N));		\
+  CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
+
+  /* Two auxliary macros are necessary to expand INSN.  */
+#define TEST_VQDMULL_N1(INSN, T1, T2, W, W2, N, L, EXPECTED_CUMULATIVE_SAT, CMT) \
+  TEST_VQDMULL_N2(INSN, T1, T2, W, W2, N, L, EXPECTED_CUMULATIVE_SAT, CMT)
+
+#define TEST_VQDMULL_N(T1, T2, W, W2, N, L, EXPECTED_CUMULATIVE_SAT, CMT) \
+  TEST_VQDMULL_N1(INSN_NAME, T1, T2, W, W2, N, L, EXPECTED_CUMULATIVE_SAT, CMT)
+
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector2, int, 16, 4);
+  DECL_VARIABLE(vector2, int, 32, 2);
+
+  DECL_VARIABLE(vector_res, int, 32, 4);
+  DECL_VARIABLE(vector_res, int, 64, 2);
+
+  clean_results ();
+
+  /* Initialize vector.  */
+  VDUP(vector, , int, s, 16, 4, 0x1000);
+  VDUP(vector, , int, s, 32, 2, 0x1000);
+
+  /* Initialize vector2.  */
+  VDUP(vector2, , int, s, 16, 4, 0x4);
+  VDUP(vector2, , int, s, 32, 2, 0x2);
+
+  /* Choose multiplier arbitrarily.  */
+  TEST_VQDMULL_N(int, s, 16, 32, 4, 0x22, expected_cumulative_sat, "");
+  TEST_VQDMULL_N(int, s, 32, 64, 2, 0x55, expected_cumulative_sat, "");
+
+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, "");
+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, "");
+
+  VDUP(vector, , int, s, 16, 4, 0x8000);
+  VDUP(vector, , int, s, 32, 2, 0x80000000);
+
+#define TEST_MSG2 "with saturation"
+  TEST_VQDMULL_N(int, s, 16, 32, 4, 0x8000, expected_cumulative_sat2, TEST_MSG2);
+  TEST_VQDMULL_N(int, s, 32, 64, 2, 0x80000000, expected_cumulative_sat2, TEST_MSG2);
+
+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected2, TEST_MSG2);
+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected2, TEST_MSG2);
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/compute-ref-data.h
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/compute-ref-data.h
@@ -0,0 +1,193 @@
+/* This file contains input data static definitions, shared by most of
+   the tests.  */
+
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+
+/* Initialization helpers; 4 slices are needed for vld2, vld3 and
+   vld4.  */
+#define MY_INIT_TABLE(T,W,N) xNAME(INIT_TABLE,N)(T##W##_t)
+#define MY_INIT_TABLE2(T,W,N) xNAME(INIT_TABLE2,N)(T##W##_t)
+#define MY_INIT_TABLE3(T,W,N) xNAME(INIT_TABLE3,N)(T##W##_t)
+#define MY_INIT_TABLE4(T,W,N) xNAME(INIT_TABLE4,N)(T##W##_t)
+
+/* Initialized input buffers.  */
+#define VECT_VAR_DECL_INIT(V, T, W, N)			\
+  VECT_VAR_DECL(V,T,W,N) [] = { MY_INIT_TABLE(T,W,N) }
+
+/* Specialized initializer with 4 entries, as used by vldX_dup and
+   vdup tests, which iterate 4 times on input buffers.  */
+#define VECT_VAR_DECL_INIT4(V, T, W, N)			\
+  VECT_VAR_DECL(V,T,W,N) [] = { MY_INIT_TABLE(T,W,4) };
+
+/* Initializers for arrays of vectors.  */
+#define VECT_ARRAY_INIT2(V, T, W, N)		\
+  T##W##_t VECT_ARRAY_VAR(V,T,W,N,2)[] =	\
+  { MY_INIT_TABLE(T,W,N)			\
+    MY_INIT_TABLE2(T,W,N) }
+
+#define VECT_ARRAY_INIT3(V, T, W, N)			\
+  T##W##_t VECT_ARRAY_VAR(V,T,W,N,3)[] =		\
+  { MY_INIT_TABLE(T,W,N)				\
+    MY_INIT_TABLE2(T,W,N)				\
+    MY_INIT_TABLE3(T,W,N) }
+
+#define VECT_ARRAY_INIT4(V, T, W, N)			\
+  T##W##_t VECT_ARRAY_VAR(V,T,W,N,4)[] =		\
+  { MY_INIT_TABLE(T,W,N)				\
+    MY_INIT_TABLE2(T,W,N)				\
+    MY_INIT_TABLE3(T,W,N)				\
+    MY_INIT_TABLE4(T,W,N) }
+
+/* Sample initialization vectors.  */
+#define INIT_TABLE_1(T)				\
+  (T)-16,
+#define INIT_TABLE2_1(T)			\
+  (T)-15,
+#define INIT_TABLE3_1(T)			\
+  (T)-14,
+#define INIT_TABLE4_1(T)			\
+  (T)-13,
+
+#define INIT_TABLE_2(T)				\
+  (T)-16, (T)-15,
+#define INIT_TABLE2_2(T)			\
+  (T)-14, (T)-13,
+#define INIT_TABLE3_2(T)			\
+  (T)-12, (T)-11,
+#define INIT_TABLE4_2(T)			\
+  (T)-10, (T)-9,
+
+/* Initializer for vld3_lane tests.  */
+#define INIT_TABLE_3(T)				\
+  (T)-16, (T)-15, (T)-14,
+
+#define INIT_TABLE_4(T)				\
+  (T)-16, (T)-15, (T)-14, (T)-13,
+#define INIT_TABLE2_4(T)			\
+  (T)-12, (T)-11, (T)-10, (T)-9,
+#define INIT_TABLE3_4(T)			\
+  (T)-8, (T)-7, (T)-6, (T)-5,
+#define INIT_TABLE4_4(T)			\
+  (T)-4, (T)-3, (T)-2, (T)-1,
+
+#define INIT_TABLE_8(T)							\
+  (T)-16, (T)-15, (T)-14, (T)-13, (T)-12, (T)-11, (T)-10, (T)-9,
+#define INIT_TABLE2_8(T)					\
+  (T)-8, (T)-7, (T)-6, (T)-5, (T)-4, (T)-3, (T)-2, (T)-1,
+#define INIT_TABLE3_8(T)				\
+  (T)0, (T)1, (T)2, (T)3, (T)4, (T)5, (T)6, (T)7,
+#define INIT_TABLE4_8(T)				\
+  (T)8, (T)9, (T)10, (T)11, (T)12, (T)13, (T)14, (T)15,
+
+#define INIT_TABLE_16(T)						\
+  (T)-16, (T)-15, (T)-14, (T)-13, (T)-12, (T)-11, (T)-10, (T)-9,	\
+  (T)-8, (T)-7, (T)-6, (T)-5, (T)-4, (T)-3, (T)-2, (T)-1,
+#define INIT_TABLE2_16(T)						\
+  (T)0, (T)1, (T)2, (T)3, (T)4, (T)5, (T)6, (T)7,			\
+  (T)8, (T)9, (T)10, (T)11, (T)12, (T)13, (T)14, (T)15,
+#define INIT_TABLE3_16(T)						\
+  (T)16, (T)17, (T)18, (T)19, (T)20, (T)21, (T)22, (T)23,		\
+   (T)24, (T)25, (T)26, (T)27, (T)28, (T)29, (T)30, (T)31,
+#define INIT_TABLE4_16(T)						\
+  (T)32, (T)33, (T)34, (T)35, (T)36, (T)37, (T)38, (T)39,		\
+  (T)40, (T)41, (T)42, (T)43, (T)44, (T)45, (T)46, (T)47,
+
+/* This one is used for padding between input buffers.  */
+#define PAD(V, T, W, N) char VECT_VAR(V,T,W,N)=42
+
+/* Input buffers, one of each size.  */
+/* Insert some padding to try to exhibit out of bounds accesses.  */
+VECT_VAR_DECL_INIT(buffer, int, 8, 8);
+PAD(buffer_pad, int, 8, 8);
+VECT_VAR_DECL_INIT(buffer, int, 16, 4);
+PAD(buffer_pad, int, 16, 4);
+VECT_VAR_DECL_INIT(buffer, int, 32, 2);
+PAD(buffer_pad, int, 32, 2);
+VECT_VAR_DECL_INIT(buffer, int, 64, 1);
+PAD(buffer_pad, int, 64, 1);
+VECT_VAR_DECL_INIT(buffer, uint, 8, 8);
+PAD(buffer_pad, uint, 8, 8);
+VECT_VAR_DECL_INIT(buffer, poly, 8, 8);
+PAD(buffer_pad, poly, 8, 8);
+VECT_VAR_DECL_INIT(buffer, poly, 16, 4);
+PAD(buffer_pad, poly, 16, 4);
+VECT_VAR_DECL_INIT(buffer, uint, 16, 4);
+PAD(buffer_pad, uint, 16, 4);
+VECT_VAR_DECL_INIT(buffer, uint, 32, 2);
+PAD(buffer_pad, uint, 32, 2);
+VECT_VAR_DECL_INIT(buffer, uint, 64, 1);
+PAD(buffer_pad, uint, 64, 1);
+VECT_VAR_DECL_INIT(buffer, float, 32, 2);
+PAD(buffer_pad, float, 32, 2);
+VECT_VAR_DECL_INIT(buffer, int, 8, 16);
+PAD(buffer_pad, int, 8, 16);
+VECT_VAR_DECL_INIT(buffer, int, 16, 8);
+PAD(buffer_pad, int, 16, 8);
+VECT_VAR_DECL_INIT(buffer, int, 32, 4);
+PAD(buffer_pad, int, 32, 4);
+VECT_VAR_DECL_INIT(buffer, int, 64, 2);
+PAD(buffer_pad, int, 64, 2);
+VECT_VAR_DECL_INIT(buffer, uint, 8, 16);
+PAD(buffer_pad, uint, 8, 16);
+VECT_VAR_DECL_INIT(buffer, uint, 16, 8);
+PAD(buffer_pad, uint, 16, 8);
+VECT_VAR_DECL_INIT(buffer, uint, 32, 4);
+PAD(buffer_pad, uint, 32, 4);
+VECT_VAR_DECL_INIT(buffer, uint, 64, 2);
+PAD(buffer_pad, uint, 64, 2);
+VECT_VAR_DECL_INIT(buffer, poly, 8, 16);
+PAD(buffer_pad, poly, 8, 16);
+VECT_VAR_DECL_INIT(buffer, poly, 16, 8);
+PAD(buffer_pad, poly, 16, 8);
+VECT_VAR_DECL_INIT(buffer, float, 32, 4);
+PAD(buffer_pad, float, 32, 4);
+
+/* The tests for vld1_dup and vdup expect at least 4 entries in the
+   input buffer, so force 1- and 2-elements initializers to have 4
+   entries (using VECT_VAR_DECL_INIT4).  */
+VECT_VAR_DECL_INIT(buffer_dup, int, 8, 8);
+VECT_VAR_DECL(buffer_dup_pad, int, 8, 8);
+VECT_VAR_DECL_INIT(buffer_dup, int, 16, 4);
+VECT_VAR_DECL(buffer_dup_pad, int, 16, 4);
+VECT_VAR_DECL_INIT4(buffer_dup, int, 32, 2);
+VECT_VAR_DECL(buffer_dup_pad, int, 32, 2);
+VECT_VAR_DECL_INIT4(buffer_dup, int, 64, 1);
+VECT_VAR_DECL(buffer_dup_pad, int, 64, 1);
+VECT_VAR_DECL_INIT(buffer_dup, uint, 8, 8);
+VECT_VAR_DECL(buffer_dup_pad, uint, 8, 8);
+VECT_VAR_DECL_INIT(buffer_dup, uint, 16, 4);
+VECT_VAR_DECL(buffer_dup_pad, uint, 16, 4);
+VECT_VAR_DECL_INIT4(buffer_dup, uint, 32, 2);
+VECT_VAR_DECL(buffer_dup_pad, uint, 32, 2);
+VECT_VAR_DECL_INIT4(buffer_dup, uint, 64, 1);
+VECT_VAR_DECL(buffer_dup_pad, uint, 64, 1);
+VECT_VAR_DECL_INIT(buffer_dup, poly, 8, 8);
+VECT_VAR_DECL(buffer_dup_pad, poly, 8, 8);
+VECT_VAR_DECL_INIT(buffer_dup, poly, 16, 4);
+VECT_VAR_DECL(buffer_dup_pad, poly, 16, 4);
+VECT_VAR_DECL_INIT4(buffer_dup, float, 32, 2);
+VECT_VAR_DECL(buffer_dup_pad, float, 32, 2);
+
+VECT_VAR_DECL_INIT(buffer_dup, int, 8, 16);
+VECT_VAR_DECL(buffer_dup_pad, int, 8, 16);
+VECT_VAR_DECL_INIT(buffer_dup, int, 16, 8);
+VECT_VAR_DECL(buffer_dup_pad, int, 16, 8);
+VECT_VAR_DECL_INIT(buffer_dup, int, 32, 4);
+VECT_VAR_DECL(buffer_dup_pad, int, 32, 4);
+VECT_VAR_DECL_INIT4(buffer_dup, int, 64, 2);
+VECT_VAR_DECL(buffer_dup_pad, int, 64, 2);
+VECT_VAR_DECL_INIT(buffer_dup, uint, 8, 16);
+VECT_VAR_DECL(buffer_dup_pad, uint, 8, 16);
+VECT_VAR_DECL_INIT(buffer_dup, uint, 16, 8);
+VECT_VAR_DECL(buffer_dup_pad, uint, 16, 8);
+VECT_VAR_DECL_INIT(buffer_dup, uint, 32, 4);
+VECT_VAR_DECL(buffer_dup_pad, uint, 32, 4);
+VECT_VAR_DECL_INIT4(buffer_dup, uint, 64, 2);
+VECT_VAR_DECL(buffer_dup_pad, uint, 64, 2);
+VECT_VAR_DECL_INIT(buffer_dup, poly, 8, 16);
+VECT_VAR_DECL(buffer_dup_pad, poly, 8, 16);
+VECT_VAR_DECL_INIT(buffer_dup, poly, 16, 8);
+VECT_VAR_DECL(buffer_dup_pad, poly, 16, 8);
+VECT_VAR_DECL_INIT(buffer_dup, float, 32, 4);
+VECT_VAR_DECL(buffer_dup_pad, float, 32, 4);
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcombine.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcombine.c
@@ -0,0 +1,98 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+				       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					0xf4, 0xf5, 0xf6, 0xf7,
+					0x11, 0x11, 0x11, 0x11,
+					0x11, 0x11, 0x11, 0x11 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+					0x22, 0x22, 0x22, 0x22 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0, 0x44 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					 0xf4, 0xf5, 0xf6, 0xf7,
+					 0x55, 0x55, 0x55, 0x55,
+					 0x55, 0x55, 0x55, 0x55 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+					 0x66, 0x66, 0x66, 0x66 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, 0x77, 0x77 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, 0x88 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					 0xf4, 0xf5, 0xf6, 0xf7,
+					 0x55, 0x55, 0x55, 0x55,
+					 0x55, 0x55, 0x55, 0x55 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+					 0x66, 0x66, 0x66, 0x66 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
+					   0x40533333, 0x40533333 };
+
+#define TEST_MSG "VCOMBINE"
+void exec_vcombine (void)
+{
+  /* Basic test: vec128=vcombine(vec64_a, vec64_b), then store the result.  */
+#define TEST_VCOMBINE(T1, T2, W, N, N2)					\
+  VECT_VAR(vector128, T1, W, N2) =					\
+    vcombine_##T2##W(VECT_VAR(vector64_a, T1, W, N),			\
+		     VECT_VAR(vector64_b, T1, W, N));			\
+  vst1q_##T2##W(VECT_VAR(result, T1, W, N2), VECT_VAR(vector128, T1, W, N2))
+
+  DECL_VARIABLE_64BITS_VARIANTS(vector64_a);
+  DECL_VARIABLE_64BITS_VARIANTS(vector64_b);
+  DECL_VARIABLE_128BITS_VARIANTS(vector128);
+
+  /* Initialize input "vector64_a" from "buffer".  */
+  TEST_MACRO_64BITS_VARIANTS_2_5(VLOAD, vector64_a, buffer);
+  VLOAD(vector64_a, buffer, , float, f, 32, 2);
+
+  /* Choose init value arbitrarily.  */
+  VDUP(vector64_b, , int, s, 8, 8, 0x11);
+  VDUP(vector64_b, , int, s, 16, 4, 0x22);
+  VDUP(vector64_b, , int, s, 32, 2, 0x33);
+  VDUP(vector64_b, , int, s, 64, 1, 0x44);
+  VDUP(vector64_b, , uint, u, 8, 8, 0x55);
+  VDUP(vector64_b, , uint, u, 16, 4, 0x66);
+  VDUP(vector64_b, , uint, u, 32, 2, 0x77);
+  VDUP(vector64_b, , uint, u, 64, 1, 0x88);
+  VDUP(vector64_b, , poly, p, 8, 8, 0x55);
+  VDUP(vector64_b, , poly, p, 16, 4, 0x66);
+  VDUP(vector64_b, , float, f, 32, 2, 3.3f);
+
+  clean_results ();
+
+  /* Execute the tests.  */
+  TEST_VCOMBINE(int, s, 8, 8, 16);
+  TEST_VCOMBINE(int, s, 16, 4, 8);
+  TEST_VCOMBINE(int, s, 32, 2, 4);
+  TEST_VCOMBINE(int, s, 64, 1, 2);
+  TEST_VCOMBINE(uint, u, 8, 8, 16);
+  TEST_VCOMBINE(uint, u, 16, 4, 8);
+  TEST_VCOMBINE(uint, u, 32, 2, 4);
+  TEST_VCOMBINE(uint, u, 64, 1, 2);
+  TEST_VCOMBINE(poly, p, 8, 8, 16);
+  TEST_VCOMBINE(poly, p, 16, 4, 8);
+  TEST_VCOMBINE(float, f, 32, 2, 4);
+
+  CHECK_RESULTS (TEST_MSG, "");
+}
+
+int main (void)
+{
+  exec_vcombine ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsubhn.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsubhn.c
@@ -0,0 +1,24 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#if defined(__cplusplus)
+#include <cstdint>
+#else
+#include <stdint.h>
+#endif
+
+#define INSN_NAME vsubhn
+#define TEST_MSG "VSUBHN"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x31, 0x31, 0x31, 0x31,
+				       0x31, 0x31, 0x31, 0x31 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x31, 0x31, 0x31, 0x31 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x17, 0x17 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x2, 0x2, 0x2, 0x2,
+					0x2, 0x2, 0x2, 0x2 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x36, 0x36, 0x36, 0x36 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x2, 0x2 };
+
+#include "vXXXhn.inc"
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlXl_n.inc
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlXl_n.inc
@@ -0,0 +1,61 @@
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+  /* vector_res = vmlxl_n(vector, vector2, val),
+     then store the result.  */
+#define TEST_VMLXL_N1(INSN, T1, T2, W, W2, N, V)			\
+  VECT_VAR(vector_res, T1, W, N) = INSN##_##T2##W2(VECT_VAR(vector, T1, W, N), \
+						   VECT_VAR(vector2, T1, W2, N), \
+						   V);			\
+  vst1q_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+#define TEST_VMLXL_N(INSN, T1, T2, W, W2, N, V)			\
+  TEST_VMLXL_N1(INSN, T1, T2, W, W2, N, V)
+
+  DECL_VARIABLE(vector, int, 32, 4);
+  DECL_VARIABLE(vector2, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+
+  DECL_VARIABLE(vector, int, 64, 2);
+  DECL_VARIABLE(vector2, int, 32, 2);
+  DECL_VARIABLE(vector_res, int, 64, 2);
+
+  DECL_VARIABLE(vector, uint, 32, 4);
+  DECL_VARIABLE(vector2, uint, 16, 4);
+  DECL_VARIABLE(vector_res, uint, 32, 4);
+
+  DECL_VARIABLE(vector, uint, 64, 2);
+  DECL_VARIABLE(vector2, uint, 32, 2);
+  DECL_VARIABLE(vector_res, uint, 64, 2);
+
+  clean_results ();
+
+  VLOAD(vector, buffer, q, int, s, 32, 4);
+  VLOAD(vector, buffer, q, int, s, 64, 2);
+  VLOAD(vector, buffer, q, uint, u, 32, 4);
+  VLOAD(vector, buffer, q, uint, u, 64, 2);
+
+  VDUP(vector2, , int, s, 16, 4, 0x55);
+  VDUP(vector2, , int, s, 32, 2, 0x55);
+  VDUP(vector2, , uint, u, 16, 4, 0x55);
+  VDUP(vector2, , uint, u, 32, 2, 0x55);
+
+  /* Choose multiplier arbitrarily.  */
+  TEST_VMLXL_N(INSN_NAME, int, s, 32, 16, 4, 0x11);
+  TEST_VMLXL_N(INSN_NAME, int, s, 64, 32, 2, 0x22);
+  TEST_VMLXL_N(INSN_NAME, uint, u, 32, 16, 4, 0x33);
+  TEST_VMLXL_N(INSN_NAME, uint, u, 64, 32, 2, 0x33);
+
+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, "");
+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, "");
+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, "");
+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, "");
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpadal.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpadal.c
@@ -0,0 +1,141 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xffd1, 0xffd6, 0xffdb, 0xffe0 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xffffffd1, 0xffffffd6 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffffd1 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x1d1, 0x1d6, 0x1db, 0x1e0 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x1ffd1, 0x1ffd6 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x1ffffffd1 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xffd1, 0xffd6, 0xffdb, 0xffe0,
+					0xffe5, 0xffea, 0xffef, 0xfff4 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffd1, 0xffffffd6,
+					0xffffffdb, 0xffffffe0 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffd1, 0xffffffffffffffd6 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x1d1, 0x1d6, 0x1db, 0x1e0,
+					 0x1e5, 0x1ea, 0x1ef, 0x1f4 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x1ffd1, 0x1ffd6, 0x1ffdb, 0x1ffe0 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x1ffffffd1, 0x1ffffffd6 };
+
+#define INSN_NAME vpadal
+#define TEST_MSG "VPADAL/VPADALQ"
+
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN_NAME)
+{
+  /* Basic test: y=OP(x), then store the result.  */
+#define TEST_VPADAL1(INSN, Q, T1, T2, W, N, W2, N2)			\
+  VECT_VAR(vector_res, T1, W2, N2) =					\
+    INSN##Q##_##T2##W(VECT_VAR(vector, T1, W2, N2), VECT_VAR(vector2, T1, W, N)); \
+  vst1##Q##_##T2##W2(VECT_VAR(result, T1, W2, N2),			\
+		     VECT_VAR(vector_res, T1, W2, N2))
+
+#define TEST_VPADAL(INSN, Q, T1, T2, W, N, W2, N2)	\
+  TEST_VPADAL1(INSN, Q, T1, T2, W, N, W2, N2)
+
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, int, 64, 1);
+  DECL_VARIABLE(vector, uint, 16, 4);
+  DECL_VARIABLE(vector, uint, 32, 2);
+  DECL_VARIABLE(vector, uint, 64, 1);
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector, int, 32, 4);
+  DECL_VARIABLE(vector, int, 64, 2);
+  DECL_VARIABLE(vector, uint, 16, 8);
+  DECL_VARIABLE(vector, uint, 32, 4);
+  DECL_VARIABLE(vector, uint, 64, 2);
+
+  DECL_VARIABLE(vector2, int, 8, 8);
+  DECL_VARIABLE(vector2, int, 16, 4);
+  DECL_VARIABLE(vector2, int, 32, 2);
+  DECL_VARIABLE(vector2, uint, 8, 8);
+  DECL_VARIABLE(vector2, uint, 16, 4);
+  DECL_VARIABLE(vector2, uint, 32, 2);
+  DECL_VARIABLE(vector2, int, 8, 16);
+  DECL_VARIABLE(vector2, int, 16, 8);
+  DECL_VARIABLE(vector2, int, 32, 4);
+  DECL_VARIABLE(vector2, uint, 8, 16);
+  DECL_VARIABLE(vector2, uint, 16, 8);
+  DECL_VARIABLE(vector2, uint, 32, 4);
+
+  DECL_VARIABLE(vector_res, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 2);
+  DECL_VARIABLE(vector_res, int, 64, 1);
+  DECL_VARIABLE(vector_res, uint, 16, 4);
+  DECL_VARIABLE(vector_res, uint, 32, 2);
+  DECL_VARIABLE(vector_res, uint, 64, 1);
+  DECL_VARIABLE(vector_res, int, 16, 8);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+  DECL_VARIABLE(vector_res, int, 64, 2);
+  DECL_VARIABLE(vector_res, uint, 16, 8);
+  DECL_VARIABLE(vector_res, uint, 32, 4);
+  DECL_VARIABLE(vector_res, uint, 64, 2);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer".  */
+  VLOAD(vector, buffer, , int, s, 16, 4);
+  VLOAD(vector, buffer, , int, s, 32, 2);
+  VLOAD(vector, buffer, , int, s, 64, 1);
+  VLOAD(vector, buffer, , uint, u, 16, 4);
+  VLOAD(vector, buffer, , uint, u, 32, 2);
+  VLOAD(vector, buffer, , uint, u, 64, 1);
+  VLOAD(vector, buffer, q, int, s, 16, 8);
+  VLOAD(vector, buffer, q, int, s, 32, 4);
+  VLOAD(vector, buffer, q, int, s, 64, 2);
+  VLOAD(vector, buffer, q, uint, u, 16, 8);
+  VLOAD(vector, buffer, q, uint, u, 32, 4);
+  VLOAD(vector, buffer, q, uint, u, 64, 2);
+
+  /* Initialize input "vector2" from "buffer".  */
+  VLOAD(vector2, buffer, , int, s, 8, 8);
+  VLOAD(vector2, buffer, , int, s, 16, 4);
+  VLOAD(vector2, buffer, , int, s, 32, 2);
+  VLOAD(vector2, buffer, , uint, u, 8, 8);
+  VLOAD(vector2, buffer, , uint, u, 16, 4);
+  VLOAD(vector2, buffer, , uint, u, 32, 2);
+  VLOAD(vector2, buffer, q, int, s, 8, 16);
+  VLOAD(vector2, buffer, q, int, s, 16, 8);
+  VLOAD(vector2, buffer, q, int, s, 32, 4);
+  VLOAD(vector2, buffer, q, uint, u, 8, 16);
+  VLOAD(vector2, buffer, q, uint, u, 16, 8);
+  VLOAD(vector2, buffer, q, uint, u, 32, 4);
+
+  /* Apply a unary operator named INSN_NAME.  */
+  TEST_VPADAL(INSN_NAME, , int, s, 8, 8, 16, 4);
+  TEST_VPADAL(INSN_NAME, , int, s, 16, 4, 32, 2);
+  TEST_VPADAL(INSN_NAME, , int, s, 32, 2, 64 ,1);
+  TEST_VPADAL(INSN_NAME, , uint, u, 8, 8, 16, 4);
+  TEST_VPADAL(INSN_NAME, , uint, u, 16, 4, 32, 2);
+  TEST_VPADAL(INSN_NAME, , uint, u, 32, 2, 64, 1);
+  TEST_VPADAL(INSN_NAME, q, int, s, 8, 16, 16, 8);
+  TEST_VPADAL(INSN_NAME, q, int, s, 16, 8, 32, 4);
+  TEST_VPADAL(INSN_NAME, q, int, s, 32, 4, 64 ,2);
+  TEST_VPADAL(INSN_NAME, q, uint, u, 8, 16, 16, 8);
+  TEST_VPADAL(INSN_NAME, q, uint, u, 16, 8, 32, 4);
+  TEST_VPADAL(INSN_NAME, q, uint, u, 32, 4, 64, 2);
+
+  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, "");
+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, "");
+  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected, "");
+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, "");
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, "");
+  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, "");
+  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, "");
+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, "");
+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, "");
+  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, "");
+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, "");
+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, "");
+}
+
+int main (void)
+{
+  exec_vpadal ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcls.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcls.c
@@ -0,0 +1,174 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x2, 0x2, 0x2, 0x2 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x19, 0x19 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x7, 0x7, 0x7, 0x7,
+					0x7, 0x7, 0x7, 0x7,
+					0x7, 0x7, 0x7, 0x7,
+					0x7, 0x7, 0x7, 0x7 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x2, 0x2, 0x2, 0x2,
+					0x2, 0x2, 0x2, 0x2 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x14, 0x14, 0x14, 0x14 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+					0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333,
+					 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+					 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+					   0x33333333, 0x33333333 };
+
+/* Expected results with negative input.  */
+VECT_VAR_DECL(expected_with_negative,int,8,8) [] = { 0x7, 0x7, 0x7, 0x7,
+						     0x7, 0x7, 0x7, 0x7 };
+VECT_VAR_DECL(expected_with_negative,int,16,4) [] = { 0x1, 0x1, 0x1, 0x1 };
+VECT_VAR_DECL(expected_with_negative,int,32,2) [] = { 0x1, 0x1 };
+VECT_VAR_DECL(expected_with_negative,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected_with_negative,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+						      0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_with_negative,uint,16,4) [] = { 0x3333, 0x3333,
+						       0x3333, 0x3333 };
+VECT_VAR_DECL(expected_with_negative,uint,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected_with_negative,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected_with_negative,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+						      0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_with_negative,poly,16,4) [] = { 0x3333, 0x3333,
+						       0x3333, 0x3333 };
+VECT_VAR_DECL(expected_with_negative,hfloat,32,2) [] = { 0x33333333,
+							 0x33333333 };
+VECT_VAR_DECL(expected_with_negative,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
+						      0x0, 0x0, 0x0, 0x0,
+						      0x0, 0x0, 0x0, 0x0,
+						      0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_with_negative,int,16,8) [] = { 0x2, 0x2, 0x2, 0x2,
+						      0x2, 0x2, 0x2, 0x2 };
+VECT_VAR_DECL(expected_with_negative,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_with_negative,int,64,2) [] = { 0x3333333333333333,
+						      0x3333333333333333 };
+VECT_VAR_DECL(expected_with_negative,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+						       0x33, 0x33, 0x33, 0x33,
+						       0x33, 0x33, 0x33, 0x33,
+						       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_with_negative,uint,16,8) [] = { 0x3333, 0x3333,
+						       0x3333, 0x3333,
+						       0x3333, 0x3333,
+						       0x3333, 0x3333 };
+VECT_VAR_DECL(expected_with_negative,uint,32,4) [] = { 0x33333333, 0x33333333,
+						       0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected_with_negative,uint,64,2) [] = { 0x3333333333333333,
+						       0x3333333333333333 };
+VECT_VAR_DECL(expected_with_negative,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+						       0x33, 0x33, 0x33, 0x33,
+						       0x33, 0x33, 0x33, 0x33,
+						       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_with_negative,poly,16,8) [] = { 0x3333, 0x3333,
+						       0x3333, 0x3333,
+						       0x3333, 0x3333,
+						       0x3333, 0x3333 };
+VECT_VAR_DECL(expected_with_negative,hfloat,32,4) [] = { 0x33333333,
+							 0x33333333,
+							 0x33333333,
+							 0x33333333 };
+
+#define INSN_NAME vcls
+#define TEST_MSG "VCLS/VCLSQ"
+
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN_NAME)
+{
+  /* Basic test: y=vcls(x), then store the result.  */
+#define TEST_UNARY_OP1(INSN, Q, T1, T2, W, N)				\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N));			\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+#define TEST_UNARY_OP(INSN, Q, T1, T2, W, N)	\
+  TEST_UNARY_OP1(INSN, Q, T1, T2, W, N)		\
+
+  /* No need for 64 bits variants.  */
+  DECL_VARIABLE(vector, int, 8, 8);
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, int, 8, 16);
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector, int, 32, 4);
+
+  DECL_VARIABLE(vector_res, int, 8, 8);
+  DECL_VARIABLE(vector_res, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 2);
+  DECL_VARIABLE(vector_res, int, 8, 16);
+  DECL_VARIABLE(vector_res, int, 16, 8);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+
+  clean_results ();
+
+  /* Fill input vector with arbitrary values.  */
+  VDUP(vector, , int, s, 8, 8, 0x1);
+  VDUP(vector, , int, s, 16, 4, 0x1234);
+  VDUP(vector, , int, s, 32, 2, 0x34);
+  VDUP(vector, q, int, s, 8, 16, 0);
+  VDUP(vector, q, int, s, 16, 8, 0x1234);
+  VDUP(vector, q, int, s, 32, 4, 0x678);
+
+  /* Apply a unary operator named INSN_NAME.  */
+  TEST_UNARY_OP(INSN_NAME, , int, s, 8, 8);
+  TEST_UNARY_OP(INSN_NAME, , int, s, 16, 4);
+  TEST_UNARY_OP(INSN_NAME, , int, s, 32, 2);
+  TEST_UNARY_OP(INSN_NAME, q, int, s, 8, 16);
+  TEST_UNARY_OP(INSN_NAME, q, int, s, 16, 8);
+  TEST_UNARY_OP(INSN_NAME, q, int, s, 32, 4);
+
+  CHECK_RESULTS (TEST_MSG, " (positive input)");
+
+  /* Fill input vector with arbitrary values (negative).  */
+  VDUP(vector, , int, s, 8, 8, 0xFF);
+  VDUP(vector, , int, s, 16, 4, 0xC234);
+  VDUP(vector, , int, s, 32, 2, 0xDEAD0034);
+  VDUP(vector, q, int, s, 8, 16, 0x80);
+  VDUP(vector, q, int, s, 16, 8, 0xE234);
+  VDUP(vector, q, int, s, 32, 4, 0xBEEF0678);
+
+  /* Apply a unary operator named INSN_NAME  */
+  TEST_UNARY_OP(INSN_NAME, , int, s, 8, 8);
+  TEST_UNARY_OP(INSN_NAME, , int, s, 16, 4);
+  TEST_UNARY_OP(INSN_NAME, , int, s, 32, 2);
+  TEST_UNARY_OP(INSN_NAME, q, int, s, 8, 16);
+  TEST_UNARY_OP(INSN_NAME, q, int, s, 16, 8);
+  TEST_UNARY_OP(INSN_NAME, q, int, s, 32, 4);
+
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_with_negative, " (negative input)");
+}
+
+int main (void)
+{
+  exec_vcls ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrhadd.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrhadd.c
@@ -0,0 +1,34 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define INSN_NAME vrhadd
+#define TEST_MSG "VRHADD/VRHADDQ"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf2, 0xf2, 0xf3, 0xf3,
+				       0xf4, 0xf4, 0xf5, 0xf5 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff1, 0xfff2, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf2, 0xf2, 0xf3, 0xf3,
+					0xf4, 0xf4, 0xf5, 0xf5 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff1, 0xfff1, 0xfff2, 0xfff2 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0xf2, 0xf3, 0xf3, 0xf4,
+					0xf4, 0xf5, 0xf5, 0xf6,
+					0xf6, 0xf7, 0xf7, 0xf8,
+					0xf8, 0xf9, 0xf9, 0xfa };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff2, 0xfff2, 0xfff3, 0xfff3,
+					0xfff4, 0xfff4, 0xfff5, 0xfff5 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff1, 0xfffffff1,
+					0xfffffff2, 0xfffffff2 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf5, 0xf5, 0xf6, 0xf6,
+					 0xf7, 0xf7, 0xf8, 0xf8,
+					 0xf9, 0xf9, 0xfa, 0xfa,
+					 0xfb, 0xfb, 0xfc, 0xfc };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff1, 0xfff2, 0xfff2, 0xfff3,
+					 0xfff3, 0xfff4, 0xfff4, 0xfff5 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff1, 0xfffffff1,
+					 0xfffffff2, 0xfffffff2 };
+
+#include "binary_op_no64.inc"
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcnt.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcnt.c
@@ -0,0 +1,96 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+					0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333,
+					0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6,
+					 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333,
+					 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+					 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6,
+					 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6, 0x6 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+					   0x33333333, 0x33333333 };
+
+#define INSN_NAME vcnt
+#define TEST_MSG "VCNT/VCNTQ"
+
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN_NAME)
+{
+  /* Basic test: y=vcnt(x), then store the result.  */
+#define TEST_UNARY_OP1(INSN, Q, T1, T2, W, N)				\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N));			\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+#define TEST_UNARY_OP(INSN, Q, T1, T2, W, N)	\
+  TEST_UNARY_OP1(INSN, Q, T1, T2, W, N)		\
+
+  /* No need for 64 bits variants.  */
+  DECL_VARIABLE(vector, int, 8, 8);
+  DECL_VARIABLE(vector, uint, 8, 8);
+  DECL_VARIABLE(vector, poly, 8, 8);
+  DECL_VARIABLE(vector, int, 8, 16);
+  DECL_VARIABLE(vector, uint, 8, 16);
+  DECL_VARIABLE(vector, poly, 8, 16);
+
+  DECL_VARIABLE(vector_res, int, 8, 8);
+  DECL_VARIABLE(vector_res, uint, 8, 8);
+  DECL_VARIABLE(vector_res, poly, 8, 8);
+  DECL_VARIABLE(vector_res, int, 8, 16);
+  DECL_VARIABLE(vector_res, uint, 8, 16);
+  DECL_VARIABLE(vector_res, poly, 8, 16);
+
+  clean_results ();
+
+  /* Fill input vector with arbitrary values.  */
+  VDUP(vector, , int, s, 8, 8, 0xFF);
+  VDUP(vector, , uint, u, 8, 8, 0x35);
+  VDUP(vector, , poly, p, 8, 8, 0x35);
+  VDUP(vector, q, int, s, 8, 16, 0);
+  VDUP(vector, q, uint, u, 8, 16, 0xBD);
+  VDUP(vector, q, poly, p, 8, 16, 0xBD);
+
+  /* Apply a unary operator named INSN_NAME.  */
+  TEST_UNARY_OP(INSN_NAME, , int, s, 8, 8);
+  TEST_UNARY_OP(INSN_NAME, , uint, u, 8, 8);
+  TEST_UNARY_OP(INSN_NAME, , poly, p, 8, 8);
+  TEST_UNARY_OP(INSN_NAME, q, int, s, 8, 16);
+  TEST_UNARY_OP(INSN_NAME, q, uint, u, 8, 16);
+  TEST_UNARY_OP(INSN_NAME, q, poly, p, 8, 16);
+
+  CHECK_RESULTS (TEST_MSG, "");
+}
+
+int main (void)
+{
+  exec_vcnt ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqabs.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqabs.c
@@ -0,0 +1,127 @@
+#define INSN_NAME vqabs
+#define TEST_MSG "VQABS/VQABSQ"
+
+/* Extra tests for functions requiring corner cases tests.  */
+void vqabs_extra(void);
+#define EXTRA_TESTS vqabs_extra
+
+#include "unary_sat_op.inc"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x10, 0xf, 0xe, 0xd };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x10, 0xf };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x10, 0xf, 0xe, 0xd,
+					0xc, 0xb, 0xa, 0x9,
+					0x8, 0x7, 0x6, 0x5,
+					0x4, 0x3, 0x2, 0x1 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x10, 0xf, 0xe, 0xd,
+					0xc, 0xb, 0xa, 0x9 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x10, 0xf, 0xe, 0xd };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333,
+					 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+					 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+					   0x33333333, 0x33333333 };
+
+/* Expected values of cumulative_saturation flag.  */
+int VECT_VAR(expected_cumulative_sat,int,8,8) = 0;
+int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;
+int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;
+int VECT_VAR(expected_cumulative_sat,int,8,16) = 0;
+int VECT_VAR(expected_cumulative_sat,int,16,8) = 0;
+int VECT_VAR(expected_cumulative_sat,int,32,4) = 0;
+
+/* Expected results when input is the min negative value of the type.  */
+VECT_VAR_DECL(expected_min_neg,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
+					       0x7f, 0x7f, 0x7f, 0x7f };
+VECT_VAR_DECL(expected_min_neg,int,16,4) [] = { 0x7fff, 0x7fff,
+						0x7fff, 0x7fff };
+VECT_VAR_DECL(expected_min_neg,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
+VECT_VAR_DECL(expected_min_neg,int,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
+						0x7f, 0x7f, 0x7f, 0x7f,
+						0x7f, 0x7f, 0x7f, 0x7f,
+						0x7f, 0x7f, 0x7f, 0x7f };
+VECT_VAR_DECL(expected_min_neg,int,16,8) [] = { 0x7fff, 0x7fff,
+						0x7fff, 0x7fff,
+						0x7fff, 0x7fff,
+						0x7fff, 0x7fff };
+VECT_VAR_DECL(expected_min_neg,int,32,4) [] = { 0x7fffffff, 0x7fffffff,
+						0x7fffffff, 0x7fffffff };
+
+/* Expected values of cumulative_saturation flag when input is the min
+   negative value of the type.  */
+int VECT_VAR(expected_cumulative_sat_min_neg,int,8,8) = 1;
+int VECT_VAR(expected_cumulative_sat_min_neg,int,16,4) = 1;
+int VECT_VAR(expected_cumulative_sat_min_neg,int,32,2) = 1;
+int VECT_VAR(expected_cumulative_sat_min_neg,int,8,16) = 1;
+int VECT_VAR(expected_cumulative_sat_min_neg,int,16,8) = 1;
+int VECT_VAR(expected_cumulative_sat_min_neg,int,32,4) = 1;
+
+void vqabs_extra()
+{
+  /* No need for 64 bits variants.  */
+  DECL_VARIABLE(vector, int, 8, 8);
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, int, 8, 16);
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector, int, 32, 4);
+
+  DECL_VARIABLE(vector_res, int, 8, 8);
+  DECL_VARIABLE(vector_res, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 2);
+  DECL_VARIABLE(vector_res, int, 8, 16);
+  DECL_VARIABLE(vector_res, int, 16, 8);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+
+  clean_results ();
+
+  /* Initialize input "vector" with min negative values to check
+     saturation.  */
+  VDUP(vector, , int, s, 8, 8, 0x80);
+  VDUP(vector, , int, s, 16, 4, 0x8000);
+  VDUP(vector, , int, s, 32, 2, 0x80000000);
+  VDUP(vector, q, int, s, 8, 16, 0x80);
+  VDUP(vector, q, int, s, 16, 8, 0x8000);
+  VDUP(vector, q, int, s, 32, 4, 0x80000000);
+
+#define MSG "min negative input"
+  TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 8, 8, expected_cumulative_sat_min_neg, MSG);
+  TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 16, 4, expected_cumulative_sat_min_neg, MSG);
+  TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 32, 2, expected_cumulative_sat_min_neg, MSG);
+  TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 8, 16, expected_cumulative_sat_min_neg, MSG);
+  TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 16, 8, expected_cumulative_sat_min_neg, MSG);
+  TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 32, 4, expected_cumulative_sat_min_neg, MSG);
+
+  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_min_neg, MSG);
+  CHECK(TEST_MSG, int, 16, 4, PRIx8, expected_min_neg, MSG);
+  CHECK(TEST_MSG, int, 32, 2, PRIx8, expected_min_neg, MSG);
+  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_min_neg, MSG);
+  CHECK(TEST_MSG, int, 16, 8, PRIx8, expected_min_neg, MSG);
+  CHECK(TEST_MSG, int, 32, 4, PRIx8, expected_min_neg, MSG);
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmull_n.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmull_n.c
@@ -0,0 +1,61 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x11000, 0x11000, 0x11000, 0x11000 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x22000, 0x22000 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33000, 0x33000, 0x33000, 0x33000 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x44000, 0x44000 };
+
+#define INSN_NAME vmull
+#define TEST_MSG "VMULL_N"
+void exec_vmull_n (void)
+{
+  int i;
+
+  /* vector_res = vmull_n(vector,val), then store the result.  */
+#define TEST_VMULL_N1(INSN, T1, T2, W, W2, N, L)			\
+  VECT_VAR(vector_res, T1, W2, N) =					\
+    INSN##_n_##T2##W(VECT_VAR(vector, T1, W, N),			\
+		     L);						\
+  vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N))
+
+#define TEST_VMULL_N(INSN, T1, T2, W, W2, N, L)	\
+  TEST_VMULL_N1(INSN, T1, T2, W, W2, N, L)
+
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, uint, 16, 4);
+  DECL_VARIABLE(vector, uint, 32, 2);
+
+  DECL_VARIABLE(vector_res, int, 32, 4);
+  DECL_VARIABLE(vector_res, int, 64, 2);
+  DECL_VARIABLE(vector_res, uint, 32, 4);
+  DECL_VARIABLE(vector_res, uint, 64, 2);
+
+  clean_results ();
+
+  /* Initialize vector.  */
+  VDUP(vector, , int, s, 16, 4, 0x1000);
+  VDUP(vector, , int, s, 32, 2, 0x1000);
+  VDUP(vector, , uint, u, 16, 4, 0x1000);
+  VDUP(vector, , uint, u, 32, 2, 0x1000);
+
+  /* Choose multiplier arbitrarily.  */
+  TEST_VMULL_N(INSN_NAME, int, s, 16, 32, 4, 0x11);
+  TEST_VMULL_N(INSN_NAME, int, s, 32, 64, 2, 0x22);
+  TEST_VMULL_N(INSN_NAME, uint, u, 16, 32, 4, 0x33);
+  TEST_VMULL_N(INSN_NAME, uint, u, 32, 64, 2, 0x44);
+
+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, "");
+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, "");
+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, "");
+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, "");
+}
+
+int main (void)
+{
+  exec_vmull_n ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmlXl_lane.inc
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmlXl_lane.inc
@@ -0,0 +1,73 @@
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+  /* vector_res = vqdmlXl_lane(vector, vector3, vector4, lane),
+     then store the result.  */
+#define TEST_VQDMLXL_LANE1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
+  Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N));		\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    INSN##_##T2##W2(VECT_VAR(vector, T1, W, N),				\
+		    VECT_VAR(vector3, T1, W2, N),			\
+		    VECT_VAR(vector4, T1, W2, N),			\
+		    V);							\
+  vst1q_##T2##W(VECT_VAR(result, T1, W, N),				\
+		VECT_VAR(vector_res, T1, W, N));			\
+  CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
+
+#define TEST_VQDMLXL_LANE(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
+  TEST_VQDMLXL_LANE1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT)
+
+  DECL_VARIABLE(vector, int, 32, 4);
+  DECL_VARIABLE(vector3, int, 16, 4);
+  DECL_VARIABLE(vector4, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+
+  DECL_VARIABLE(vector, int, 64, 2);
+  DECL_VARIABLE(vector3, int, 32, 2);
+  DECL_VARIABLE(vector4, int, 32, 2);
+  DECL_VARIABLE(vector_res, int, 64, 2);
+
+  clean_results ();
+
+  VLOAD(vector, buffer, q, int, s, 32, 4);
+  VLOAD(vector, buffer, q, int, s, 64, 2);
+
+  VDUP(vector3, , int, s, 16, 4, 0x55);
+  VDUP(vector4, , int, s, 16, 4, 0xBB);
+  VDUP(vector3, , int, s, 32, 2, 0x55);
+  VDUP(vector4, , int, s, 32, 2, 0xBB);
+
+  TEST_VQDMLXL_LANE(INSN_NAME, int, s, 32, 16, 4, 0, expected_cumulative_sat, "");
+  TEST_VQDMLXL_LANE(INSN_NAME, int, s, 64, 32, 2, 0, expected_cumulative_sat, "");
+
+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, "");
+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, "");
+
+#define TEST_MSG2 "(mul with input=0)"
+  VDUP(vector3, , int, s, 16, 4, 0);
+  VDUP(vector3, , int, s, 32, 2, 0);
+  TEST_VQDMLXL_LANE(INSN_NAME, int, s, 32, 16, 4, 0, expected_cumulative_sat2, TEST_MSG2);
+  TEST_VQDMLXL_LANE(INSN_NAME, int, s, 64, 32, 2, 0, expected_cumulative_sat2, TEST_MSG2);
+
+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected2, TEST_MSG2);
+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected2, TEST_MSG2);
+
+#define TEST_MSG3 "(mul with saturation)"
+  VDUP(vector3, , int, s, 16, 4, 0x8000);
+  VDUP(vector3, , int, s, 32, 2, 0x80000000);
+  VDUP(vector4, , int, s, 16, 4, 0x8000);
+  VDUP(vector4, , int, s, 32, 2, 0x80000000);
+  TEST_VQDMLXL_LANE(INSN_NAME, int, s, 32, 16, 4, 0, expected_cumulative_sat3, TEST_MSG3);
+  TEST_VQDMLXL_LANE(INSN_NAME, int, s, 64, 32, 2, 0, expected_cumulative_sat3, TEST_MSG3);
+
+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected3, TEST_MSG3);
+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected3, TEST_MSG3);
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmlXl.inc
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmlXl.inc
@@ -0,0 +1,63 @@
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+  /* vector_res = OP(vector, vector3, vector4),
+     then store the result.  */
+#define TEST_VQDMLXL1(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT) \
+  Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N));		\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    INSN##_##T2##W2(VECT_VAR(vector, T1, W, N),				\
+		    VECT_VAR(vector3, T1, W2, N),			\
+		    VECT_VAR(vector4, T1, W2, N));			\
+    vst1q_##T2##W(VECT_VAR(result, T1, W, N),				\
+		  VECT_VAR(vector_res, T1, W, N));			\
+    CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
+
+#define TEST_VQDMLXL(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT) \
+  TEST_VQDMLXL1(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT)
+
+  DECL_VARIABLE(vector, int, 32, 4);
+  DECL_VARIABLE(vector3, int, 16, 4);
+  DECL_VARIABLE(vector4, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+  DECL_VARIABLE(vector, int, 64, 2);
+  DECL_VARIABLE(vector3, int, 32, 2);
+  DECL_VARIABLE(vector4, int, 32, 2);
+  DECL_VARIABLE(vector_res, int, 64, 2);
+
+  clean_results ();
+
+  VLOAD(vector, buffer, q, int, s, 32, 4);
+  VLOAD(vector, buffer, q, int, s, 64, 2);
+
+  VDUP(vector3, , int, s, 16, 4, 0x55);
+  VDUP(vector4, , int, s, 16, 4, 0xBB);
+  VDUP(vector3, , int, s, 32, 2, 0x55);
+  VDUP(vector4, , int, s, 32, 2, 0xBB);
+
+  TEST_VQDMLXL(INSN_NAME, int, s, 32, 16, 4, expected_cumulative_sat, "");
+  TEST_VQDMLXL(INSN_NAME, int, s, 64, 32, 2, expected_cumulative_sat, "");
+
+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, "");
+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, "");
+
+  VDUP(vector3, , int, s, 16, 4, 0x8000);
+  VDUP(vector4, , int, s, 16, 4, 0x8000);
+  VDUP(vector3, , int, s, 32, 2, 0x80000000);
+  VDUP(vector4, , int, s, 32, 2, 0x80000000);
+
+#define TEST_MSG2 "with saturation"
+  TEST_VQDMLXL(INSN_NAME, int, s, 32, 16, 4, expected_cumulative_sat2, TEST_MSG2);
+  TEST_VQDMLXL(INSN_NAME, int, s, 64, 32, 2, expected_cumulative_sat2, TEST_MSG2);
+
+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected2, TEST_MSG2);
+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected2, TEST_MSG2);
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vXXXl.inc
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vXXXl.inc
@@ -0,0 +1,70 @@
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+  /* Basic test: y=vaddl(x1,x2), then store the result.  */
+#define TEST_VADDL1(INSN, T1, T2, W, W2, N)				\
+  VECT_VAR(vector_res, T1, W2, N) =					\
+    INSN##_##T2##W(VECT_VAR(vector, T1, W, N),				\
+		   VECT_VAR(vector2, T1, W, N));			\
+  vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N))
+
+#define TEST_VADDL(INSN, T1, T2, W, W2, N)	\
+  TEST_VADDL1(INSN, T1, T2, W, W2, N)
+
+  DECL_VARIABLE(vector, int, 8, 8);
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, uint, 8, 8);
+  DECL_VARIABLE(vector, uint, 16, 4);
+  DECL_VARIABLE(vector, uint, 32, 2);
+
+  DECL_VARIABLE(vector2, int, 8, 8);
+  DECL_VARIABLE(vector2, int, 16, 4);
+  DECL_VARIABLE(vector2, int, 32, 2);
+  DECL_VARIABLE(vector2, uint, 8, 8);
+  DECL_VARIABLE(vector2, uint, 16, 4);
+  DECL_VARIABLE(vector2, uint, 32, 2);
+
+  DECL_VARIABLE(vector_res, int, 16, 8);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+  DECL_VARIABLE(vector_res, int, 64, 2);
+  DECL_VARIABLE(vector_res, uint, 16, 8);
+  DECL_VARIABLE(vector_res, uint, 32, 4);
+  DECL_VARIABLE(vector_res, uint, 64, 2);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer".  */
+  VLOAD(vector, buffer, , int, s, 8, 8);
+  VLOAD(vector, buffer, , int, s, 16, 4);
+  VLOAD(vector, buffer, , int, s, 32, 2);
+  VLOAD(vector, buffer, , uint, u, 8, 8);
+  VLOAD(vector, buffer, , uint, u, 16, 4);
+  VLOAD(vector, buffer, , uint, u, 32, 2);
+
+  /* Choose init value arbitrarily.  */
+  VDUP(vector2, , int, s, 8, 8, -13);
+  VDUP(vector2, , int, s, 16, 4, -14);
+  VDUP(vector2, , int, s, 32, 2, -16);
+  VDUP(vector2, , uint, u, 8, 8, 0xf3);
+  VDUP(vector2, , uint, u, 16, 4, 0xfff1);
+  VDUP(vector2, , uint, u, 32, 2, 0xfffffff0);
+
+  /* Execute the tests.  */
+  TEST_VADDL(INSN_NAME, int, s, 8, 16, 8);
+  TEST_VADDL(INSN_NAME, int, s, 16, 32, 4);
+  TEST_VADDL(INSN_NAME, int, s, 32, 64, 2);
+  TEST_VADDL(INSN_NAME, uint, u, 8, 16, 8);
+  TEST_VADDL(INSN_NAME, uint, u, 16, 32, 4);
+  TEST_VADDL(INSN_NAME, uint, u, 32, 64, 2);
+
+  CHECK_RESULTS (TEST_MSG, "");
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmls_lane.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmls_lane.c
@@ -0,0 +1,25 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define INSN_NAME vmls
+#define TEST_MSG "VMLS_LANE"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xc1d9, 0xc1da, 0xc1db, 0xc1dc };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xffffc1d9, 0xffffc1da };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xc1d9, 0xc1da, 0xc1db, 0xc1dc };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffc1d9, 0xffffc1da };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc420c687, 0xc4208687 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xc1d9, 0xc1da, 0xc1db, 0xc1dc,
+					0xc1dd, 0xc1de, 0xc1df, 0xc1e0 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffc1d9, 0xffffc1da,
+					0xffffc1db, 0xffffc1dc };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xc1d9, 0xc1da, 0xc1db, 0xc1dc,
+					 0xc1dd, 0xc1de, 0xc1df, 0xc1e0 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffc1d9, 0xffffc1da,
+					 0xffffc1db, 0xffffc1dc };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc4223168, 0xc421f168,
+					   0xc421b168, 0xc4217168 };
+
+#include "vmlX_lane.inc"
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmls.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmls.c
@@ -0,0 +1,37 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define INSN_NAME vmls
+#define TEST_MSG "VMLS"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x1, 0x2, 0x3, 0x4,
+				       0x5, 0x6, 0x7, 0x8 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xe054, 0xe055, 0xe056, 0xe057 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xffffd3e9, 0xffffd3ea };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xc0, 0xc1, 0xc2, 0xc3,
+					0xc4, 0xc5, 0xc6, 0xc7 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xc1d9, 0xc1da, 0xc1db, 0xc1dc };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffbc34, 0xffffbc35 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc3b14e76, 0xc3b0ce76 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0xd1, 0xd2, 0xd3, 0xd4,
+					0xd5, 0xd6, 0xd7, 0xd8,
+					0xd9, 0xda, 0xdb, 0xdc,
+					0xdd, 0xde, 0xdf, 0xe0 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xb7b0, 0xb7b1, 0xb7b2, 0xb7b3,
+					0xb7b4, 0xb7b5, 0xb7b6, 0xb7b7 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffb8d1, 0xffffb8d2,
+					0xffffb8d3, 0xffffb8d4 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x34, 0x35, 0x36, 0x37,
+					 0x38, 0x39, 0x3a, 0x3b,
+					 0x3c, 0x3d, 0x3e, 0x3f,
+					 0x40, 0x41, 0x42, 0x43 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xc1d9, 0xc1da, 0xc1db, 0xc1dc,
+					 0xc1dd, 0xc1de, 0xc1df, 0xc1e0 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffc9c0, 0xffffc9c1,
+					 0xffffc9c2, 0xffffc9c3 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc5f1ae15, 0xc5f1a615,
+					   0xc5f19e15, 0xc5f19615 };
+
+#include "vmlX.inc"
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmlsl_lane.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmlsl_lane.c
@@ -0,0 +1,40 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define INSN_NAME vqdmlsl_lane
+#define TEST_MSG "VQDMLSL_LANE"
+
+/* Expected values of cumulative_saturation flag.  */
+int VECT_VAR(expected_cumulative_sat,int,32,4) = 0;
+int VECT_VAR(expected_cumulative_sat,int,64,2) = 0;
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xffff83c2, 0xffff83c3,
+					0xffff83c4, 0xffff83c5 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffff83c2,
+					0xffffffffffff83c3 };
+
+/* Expected values of cumulative_saturation flag when multiplying with
+   0.  */
+int VECT_VAR(expected_cumulative_sat2,int,32,4) = 0;
+int VECT_VAR(expected_cumulative_sat2,int,64,2) = 0;
+
+/* Expected values when multiplying with 0.  */
+VECT_VAR_DECL(expected2,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
+					 0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected2,int,64,2) [] = { 0xfffffffffffffff0,
+					 0xfffffffffffffff1 };
+
+/* Expected values of cumulative_saturation flag when multiplication
+   saturates.  */
+int VECT_VAR(expected_cumulative_sat3,int,32,4) = 1;
+int VECT_VAR(expected_cumulative_sat3,int,64,2) = 1;
+
+/* Expected values when multiplication saturates.  */
+VECT_VAR_DECL(expected3,int,32,4) [] = { 0x80000000, 0x80000000,
+					 0x80000000, 0x80000000 };
+VECT_VAR_DECL(expected3,int,64,2) [] = { 0x8000000000000000,
+					 0x8000000000000000 };
+
+#include "vqdmlXl_lane.inc"
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvt.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvt.c
@@ -0,0 +1,185 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+#include <math.h>
+
+/* Expected results for vcvt.  */
+VECT_VAR_DECL(expected_s,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
+VECT_VAR_DECL(expected_u,hfloat,32,2) [] = { 0x4f800000, 0x4f800000 };
+VECT_VAR_DECL(expected_s,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
+					   0xc1600000, 0xc1500000 };
+VECT_VAR_DECL(expected_u,hfloat,32,4) [] = { 0x4f800000, 0x4f800000,
+					   0x4f800000, 0x4f800000 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff1, 0x5 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x5 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0xf, 0xfffffff1 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0xf, 0x0 };
+
+/* Expected results for vcvt_n.  */
+VECT_VAR_DECL(expected_vcvt_n_s,hfloat,32,2) [] = { 0xc0800000, 0xc0700000 };
+VECT_VAR_DECL(expected_vcvt_n_u,hfloat,32,2) [] = { 0x4c000000, 0x4c000000 };
+VECT_VAR_DECL(expected_vcvt_n_s,hfloat,32,4) [] = { 0xb2800000, 0xb2700000,
+						  0xb2600000, 0xb2500000 };
+VECT_VAR_DECL(expected_vcvt_n_u,hfloat,32,4) [] = { 0x49800000, 0x49800000,
+						  0x49800000, 0x49800000 };
+VECT_VAR_DECL(expected_vcvt_n,int,32,2) [] = { 0xff0b3333, 0x54cccd };
+VECT_VAR_DECL(expected_vcvt_n,uint,32,2) [] = { 0x0, 0x15 };
+VECT_VAR_DECL(expected_vcvt_n,int,32,4) [] = { 0x0, 0x0, 0x1e3d7, 0xfffe1c29 };
+VECT_VAR_DECL(expected_vcvt_n,uint,32,4) [] = { 0x0, 0x0, 0x1e, 0x0 };
+
+/* Expected results for vcvt with rounding.  */
+VECT_VAR_DECL(expected_rounding,int,32,2) [] = { 0xa, 0xa };
+VECT_VAR_DECL(expected_rounding,uint,32,2) [] = { 0xa, 0xa };
+VECT_VAR_DECL(expected_rounding,int,32,4) [] = { 0x7d, 0x7d, 0x7d, 0x7d };
+VECT_VAR_DECL(expected_rounding,uint,32,4) [] = { 0x7d, 0x7d, 0x7d, 0x7d };
+
+/* Expected results for vcvt_n with rounding.  */
+VECT_VAR_DECL(expected_vcvt_n_rounding,int,32,2) [] = { 0xa66666, 0xa66666 };
+VECT_VAR_DECL(expected_vcvt_n_rounding,uint,32,2) [] = { 0xa66666, 0xa66666 };
+VECT_VAR_DECL(expected_vcvt_n_rounding,int,32,4) [] = { 0xfbccc, 0xfbccc,
+					       0xfbccc, 0xfbccc };
+VECT_VAR_DECL(expected_vcvt_n_rounding,uint,32,4) [] = { 0xfbccc, 0xfbccc,
+						0xfbccc, 0xfbccc };
+
+/* Expected results for vcvt_n with saturation.  */
+VECT_VAR_DECL(expected_vcvt_n_saturation,int,32,2) [] = { 0x7fffffff,
+							  0x7fffffff };
+VECT_VAR_DECL(expected_vcvt_n_saturation,int,32,4) [] = { 0x7fffffff,
+							  0x7fffffff,
+					       0x7fffffff, 0x7fffffff };
+
+#define TEST_MSG "VCVT/VCVTQ"
+void exec_vcvt (void)
+{
+  int i;
+
+  /* Basic test: y=vcvt(x), then store the result.  */
+#define TEST_VCVT(Q, T1, T2, W, N, TS1, TS2, EXP)		\
+  VECT_VAR(vector_res, T1, W, N) =				\
+    vcvt##Q##_##T2##W##_##TS2##W(VECT_VAR(vector, TS1, W, N));	\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),			\
+		    VECT_VAR(vector_res, T1, W, N));		\
+  CHECK(TEST_MSG, T1, W, N, PRIx##W, EXP, TEST_MSG2);
+
+#define TEST_VCVT_FP(Q, T1, T2, W, N, TS1, TS2, EXP)		\
+  VECT_VAR(vector_res, T1, W, N) =				\
+    vcvt##Q##_##T2##W##_##TS2##W(VECT_VAR(vector, TS1, W, N));	\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),			\
+		    VECT_VAR(vector_res, T1, W, N));		\
+  CHECK_FP(TEST_MSG, T1, W, N, PRIx##W, EXP, TEST_MSG2);
+
+#define TEST_VCVT_N(Q, T1, T2, W, N, TS1, TS2, V, EXP)			\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    vcvt##Q##_n_##T2##W##_##TS2##W(VECT_VAR(vector, TS1, W, N), V);	\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),				\
+		    VECT_VAR(vector_res, T1, W, N));			\
+  CHECK(TEST_MSG, T1, W, N, PRIx##W, EXP, TEST_MSG2);
+
+#define TEST_VCVT_N_FP(Q, T1, T2, W, N, TS1, TS2, V, EXP)		\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    vcvt##Q##_n_##T2##W##_##TS2##W(VECT_VAR(vector, TS1, W, N), V);	\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),				\
+		    VECT_VAR(vector_res, T1, W, N));			\
+  CHECK_FP(TEST_MSG, T1, W, N, PRIx##W, EXP, TEST_MSG2);
+
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer".  */
+  TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
+  VLOAD(vector, buffer, , float, f, 32, 2);
+  VLOAD(vector, buffer, q, float, f, 32, 4);
+
+  /* Make sure some elements have a fractional part, to exercise
+     integer conversions.  */
+  VSET_LANE(vector, , float, f, 32, 2, 0, -15.3f);
+  VSET_LANE(vector, , float, f, 32, 2, 1, 5.3f);
+  VSET_LANE(vector, q, float, f, 32, 4, 2, -15.3f);
+  VSET_LANE(vector, q, float, f, 32, 4, 3, 5.3f);
+
+  /* The same result buffers are used multiple times, so we check them
+     before overwriting them.  */
+#define TEST_MSG2 ""
+
+  /* vcvt_f32_xx.  */
+  TEST_VCVT_FP(, float, f, 32, 2, int, s, expected_s);
+  TEST_VCVT_FP(, float, f, 32, 2, uint, u, expected_u);
+
+  /* vcvtq_f32_xx.  */
+  TEST_VCVT_FP(q, float, f, 32, 4, int, s, expected_s);
+  TEST_VCVT_FP(q, float, f, 32, 4, uint, u, expected_u);
+
+  /* vcvt_xx_f32.  */
+  TEST_VCVT(, int, s, 32, 2, float, f, expected);
+  TEST_VCVT(, uint, u, 32, 2, float, f, expected);
+
+  VSET_LANE(vector, q, float, f, 32, 4, 0, 0.0f);
+  VSET_LANE(vector, q, float, f, 32, 4, 1, -0.0f);
+  VSET_LANE(vector, q, float, f, 32, 4, 2, 15.12f);
+  VSET_LANE(vector, q, float, f, 32, 4, 3, -15.12f);
+
+  /* vcvtq_xx_f32.  */
+  TEST_VCVT(q, int, s, 32, 4, float, f, expected);
+  TEST_VCVT(q, uint, u, 32, 4, float, f, expected);
+
+  /* The same result buffers are used multiple times, so we check them
+     before overwriting them.  */
+#undef TEST_MSG
+#define TEST_MSG "VCVT_N/VCVTQ_N"
+
+  /* vcvt_n_f32_xx.  */
+  TEST_VCVT_N_FP(, float, f, 32, 2, int, s, 2, expected_vcvt_n_s);
+  TEST_VCVT_N_FP(, float, f, 32, 2, uint, u, 7, expected_vcvt_n_u);
+
+  /* vcvtq_n_f32_xx.  */
+  TEST_VCVT_N_FP(q, float, f, 32, 4, int, s, 30, expected_vcvt_n_s);
+  TEST_VCVT_N_FP(q, float, f, 32, 4, uint, u, 12, expected_vcvt_n_u);
+
+  /* vcvt_n_xx_f32.  */
+  TEST_VCVT_N(, int, s, 32, 2, float, f, 20, expected_vcvt_n);
+  TEST_VCVT_N(, uint, u, 32, 2, float, f, 2, expected_vcvt_n);
+
+  /* vcvtq_n_xx_f32.  */
+  TEST_VCVT_N(q, int, s, 32, 4, float, f, 13, expected_vcvt_n);
+  TEST_VCVT_N(q, uint, u, 32, 4, float, f, 1, expected_vcvt_n);
+
+  /* Check rounding.  */
+#undef TEST_MSG
+#define TEST_MSG "VCVT/VCVTQ"
+#undef TEST_MSG2
+#define TEST_MSG2 "(check rounding)"
+  VDUP(vector, , float, f, 32, 2, 10.4f);
+  VDUP(vector, q, float, f, 32, 4, 125.9f);
+  /* vcvt_xx_f32.  */
+  TEST_VCVT(, int, s, 32, 2, float, f, expected_rounding);
+  TEST_VCVT(, uint, u, 32, 2, float, f, expected_rounding);
+  /* vcvtq_xx_f32.  */
+  TEST_VCVT(q, int, s, 32, 4, float, f, expected_rounding);
+  TEST_VCVT(q, uint, u, 32, 4, float, f, expected_rounding);
+
+#undef TEST_MSG
+#define TEST_MSG "VCVT_N/VCVTQ_N"
+  /* vcvt_n_xx_f32.  */
+  TEST_VCVT_N(, int, s, 32, 2, float, f, 20, expected_vcvt_n_rounding);
+  TEST_VCVT_N(, uint, u, 32, 2, float, f, 20, expected_vcvt_n_rounding);
+  /* vcvtq_n_xx_f32.  */
+  TEST_VCVT_N(q, int, s, 32, 4, float, f, 13, expected_vcvt_n_rounding);
+  TEST_VCVT_N(q, uint, u, 32, 4, float, f, 13, expected_vcvt_n_rounding);
+
+#undef TEST_MSG
+#define TEST_MSG "VCVT_N/VCVTQ_N"
+#undef TEST_MSG2
+#define TEST_MSG2 "(check saturation)"
+  /* vcvt_n_xx_f32.  */
+  TEST_VCVT_N(, int, s, 32, 2, float, f, 31, expected_vcvt_n_saturation);
+  /* vcvtq_n_xx_f32.  */
+  TEST_VCVT_N(q, int, s, 32, 4, float, f, 31, expected_vcvt_n_saturation);
+}
+
+int main (void)
+{
+  exec_vcvt ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/unary_op.inc
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/unary_op.inc
@@ -0,0 +1,72 @@
+/* Template file for unary operator validation.
+
+   This file is meant to be included by the relevant test files, which
+   have to define the intrinsic family to test. If a given intrinsic
+   supports variants which are not supported by all the other unary
+   operators, these can be tested by providing a definition for
+   EXTRA_TESTS.  */
+
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+  /* Basic test: y=OP(x), then store the result.  */
+#define TEST_UNARY_OP1(INSN, Q, T1, T2, W, N)				\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N));			\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+#define TEST_UNARY_OP(INSN, Q, T1, T2, W, N)				\
+  TEST_UNARY_OP1(INSN, Q, T1, T2, W, N)					\
+
+  /* No need for 64 bits variants in the general case.  */
+  DECL_VARIABLE(vector, int, 8, 8);
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, int, 8, 16);
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector, int, 32, 4);
+
+  DECL_VARIABLE(vector_res, int, 8, 8);
+  DECL_VARIABLE(vector_res, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 2);
+  DECL_VARIABLE(vector_res, int, 8, 16);
+  DECL_VARIABLE(vector_res, int, 16, 8);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer".  */
+  VLOAD(vector, buffer, , int, s, 8, 8);
+  VLOAD(vector, buffer, , int, s, 16, 4);
+  VLOAD(vector, buffer, , int, s, 32, 2);
+  VLOAD(vector, buffer, q, int, s, 8, 16);
+  VLOAD(vector, buffer, q, int, s, 16, 8);
+  VLOAD(vector, buffer, q, int, s, 32, 4);
+
+  /* Apply a unary operator named INSN_NAME.  */
+  TEST_UNARY_OP(INSN_NAME, , int, s, 8, 8);
+  TEST_UNARY_OP(INSN_NAME, , int, s, 16, 4);
+  TEST_UNARY_OP(INSN_NAME, , int, s, 32, 2);
+  TEST_UNARY_OP(INSN_NAME, q, int, s, 8, 16);
+  TEST_UNARY_OP(INSN_NAME, q, int, s, 16, 8);
+  TEST_UNARY_OP(INSN_NAME, q, int, s, 32, 4);
+
+  CHECK_RESULTS (TEST_MSG, "");
+
+#ifdef EXTRA_TESTS
+  EXTRA_TESTS();
+#endif
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME)();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcge.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcge.c
@@ -0,0 +1,76 @@
+#define INSN_NAME vcge
+#define TEST_MSG "VCGE/VCGEQ"
+
+#include "cmp_op.inc"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+				       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xff, 0xff };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0xffff, 0xffff };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333,
+					0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333,
+					0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+					0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
+					 0x0, 0x0, 0x0, 0x0,
+					 0x0, 0x0, 0x0, 0x0,
+					 0xff, 0xff, 0xff, 0xff };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
+					 0x0, 0x0, 0xffff, 0xffff };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0xffffffff, 0xffffffff };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+					 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+					   0x33333333, 0x33333333 };
+
+VECT_VAR_DECL(expected_uint,uint,8,8) [] = { 0x0, 0x0, 0x0, 0xff,
+					     0xff, 0xff, 0xff, 0xff };
+VECT_VAR_DECL(expected_uint,uint,16,4) [] = { 0x0, 0x0, 0xffff, 0xffff };
+VECT_VAR_DECL(expected_uint,uint,32,2) [] = { 0x0, 0xffffffff };
+
+VECT_VAR_DECL(expected_q_uint,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
+						0xff, 0xff, 0xff, 0xff,
+						0xff, 0xff, 0xff, 0xff,
+						0xff, 0xff, 0xff, 0xff };
+VECT_VAR_DECL(expected_q_uint,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
+						0, 0x0, 0xffff, 0xffff };
+VECT_VAR_DECL(expected_q_uint,uint,32,4) [] = { 0x0, 0x0, 0xffffffff, 0xffffffff };
+
+VECT_VAR_DECL(expected_float,uint,32,2) [] = { 0x0, 0xffffffff };
+VECT_VAR_DECL(expected_q_float,uint,32,4) [] = { 0x0, 0x0, 0xffffffff, 0xffffffff };
+
+VECT_VAR_DECL(expected_uint2,uint,32,2) [] = { 0xffffffff, 0xffffffff };
+VECT_VAR_DECL(expected_uint3,uint,32,2) [] = { 0x0, 0xffffffff };
+VECT_VAR_DECL(expected_uint4,uint,32,2) [] = { 0xffffffff, 0xffffffff };
+
+VECT_VAR_DECL(expected_nan,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_mnan,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_nan2,uint,32,2) [] = { 0x0, 0x0 };
+
+VECT_VAR_DECL(expected_inf,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_minf,uint,32,2) [] = { 0xffffffff, 0xffffffff };
+VECT_VAR_DECL(expected_inf2,uint,32,2) [] = { 0xffffffff, 0xffffffff };
+
+VECT_VAR_DECL(expected_mzero,uint,32,2) [] = { 0xffffffff, 0xffffffff };
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vorn.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vorn.c
@@ -0,0 +1,48 @@
+#define INSN_NAME vorn
+#define TEST_MSG "VORN/VORNQ"
+
+#include "binary_op.inc"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xfd, 0xfd, 0xff, 0xff,
+				       0xfd, 0xfd, 0xff, 0xff };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffffc, 0xfffffffd };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffffb };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xfb, 0xfb, 0xfb, 0xfb,
+					0xff, 0xff, 0xff, 0xff };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff1, 0xfff1, 0xfff3, 0xfff3 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff7, 0xfffffff7 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffffd };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0xf9, 0xf9, 0xfb, 0xfb,
+					0xfd, 0xfd, 0xff, 0xff,
+					0xf9, 0xf9, 0xfb, 0xfb,
+					0xfd, 0xfd, 0xff, 0xff };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3,
+					0xfff7, 0xfff7, 0xfff7, 0xfff7 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffffd, 0xfffffffd,
+					0xffffffff, 0xffffffff };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff7,
+					0xfffffffffffffff7 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf3, 0xf3, 0xf3, 0xf3,
+					 0xf7, 0xf7, 0xf7, 0xf7,
+					 0xfb, 0xfb, 0xfb, 0xfb,
+					 0xff, 0xff, 0xff, 0xff };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfffc, 0xfffd, 0xfffe, 0xffff,
+					 0xfffc, 0xfffd, 0xfffe, 0xffff };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff8, 0xfffffff9,
+					 0xfffffffa, 0xfffffffb };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffffc,
+					 0xfffffffffffffffd };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+					   0x33333333, 0x33333333 };
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpXXX.inc
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpXXX.inc
@@ -0,0 +1,67 @@
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+  /* Basic test: y=OP(x), then store the result.  */
+#define TEST_VPXXX1(INSN, T1, T2, W, N)					\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    INSN##_##T2##W(VECT_VAR(vector, T1, W, N),				\
+		   VECT_VAR(vector, T1, W, N));				\
+  vst1##_##T2##W(VECT_VAR(result, T1, W, N),				\
+		 VECT_VAR(vector_res, T1, W, N))
+
+#define TEST_VPXXX(INSN, T1, T2, W, N)		\
+  TEST_VPXXX1(INSN, T1, T2, W, N)		\
+
+  /* No need for 64 bits variants.  */
+  DECL_VARIABLE(vector, int, 8, 8);
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, uint, 8, 8);
+  DECL_VARIABLE(vector, uint, 16, 4);
+  DECL_VARIABLE(vector, uint, 32, 2);
+  DECL_VARIABLE(vector, float, 32, 2);
+
+  DECL_VARIABLE(vector_res, int, 8, 8);
+  DECL_VARIABLE(vector_res, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 2);
+  DECL_VARIABLE(vector_res, uint, 8, 8);
+  DECL_VARIABLE(vector_res, uint, 16, 4);
+  DECL_VARIABLE(vector_res, uint, 32, 2);
+  DECL_VARIABLE(vector_res, float, 32, 2);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer".  */
+  VLOAD(vector, buffer, , int, s, 8, 8);
+  VLOAD(vector, buffer, , int, s, 16, 4);
+  VLOAD(vector, buffer, , int, s, 32, 2);
+  VLOAD(vector, buffer, , uint, u, 8, 8);
+  VLOAD(vector, buffer, , uint, u, 16, 4);
+  VLOAD(vector, buffer, , uint, u, 32, 2);
+  VLOAD(vector, buffer, , float, f, 32, 2);
+
+  /* Apply a binary operator named INSN_NAME.  */
+  TEST_VPXXX(INSN_NAME, int, s, 8, 8);
+  TEST_VPXXX(INSN_NAME, int, s, 16, 4);
+  TEST_VPXXX(INSN_NAME, int, s, 32, 2);
+  TEST_VPXXX(INSN_NAME, uint, u, 8, 8);
+  TEST_VPXXX(INSN_NAME, uint, u, 16, 4);
+  TEST_VPXXX(INSN_NAME, uint, u, 32, 2);
+  TEST_VPXXX(INSN_NAME, float, f, 32, 2);
+
+  CHECK(TEST_MSG, int, 8, 8, PRIx32, expected, "");
+  CHECK(TEST_MSG, int, 16, 4, PRIx64, expected, "");
+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, "");
+  CHECK(TEST_MSG, uint, 8, 8, PRIx32, expected, "");
+  CHECK(TEST_MSG, uint, 16, 4, PRIx64, expected, "");
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, "");
+  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, "");
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_dup.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_dup.c
@@ -0,0 +1,671 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results.  */
+
+/* vld2_dup/chunk 0.  */
+VECT_VAR_DECL(expected_vld2_0,int,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1,
+				       0xf0, 0xf1, 0xf0, 0xf1 };
+VECT_VAR_DECL(expected_vld2_0,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff0, 0xfff1 };
+VECT_VAR_DECL(expected_vld2_0,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected_vld2_0,int,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld2_0,uint,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1,
+					0xf0, 0xf1, 0xf0, 0xf1 };
+VECT_VAR_DECL(expected_vld2_0,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff0, 0xfff1 };
+VECT_VAR_DECL(expected_vld2_0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected_vld2_0,uint,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld2_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1,
+					0xf0, 0xf1, 0xf0, 0xf1 };
+VECT_VAR_DECL(expected_vld2_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff0, 0xfff1 };
+VECT_VAR_DECL(expected_vld2_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
+VECT_VAR_DECL(expected_vld2_0,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld2_0,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected_vld2_0,int,32,4) [] = { 0x33333333, 0x33333333,
+					0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected_vld2_0,int,64,2) [] = { 0x3333333333333333,
+					0x3333333333333333 };
+VECT_VAR_DECL(expected_vld2_0,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld2_0,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected_vld2_0,uint,32,4) [] = { 0x33333333, 0x33333333,
+					 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected_vld2_0,uint,64,2) [] = { 0x3333333333333333,
+					 0x3333333333333333 };
+VECT_VAR_DECL(expected_vld2_0,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld2_0,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected_vld2_0,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+					   0x33333333, 0x33333333 };
+
+/* vld2_dup/chunk 1.  */
+VECT_VAR_DECL(expected_vld2_1,int,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1,
+					      0xf0, 0xf1, 0xf0, 0xf1 };
+VECT_VAR_DECL(expected_vld2_1,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff0, 0xfff1 };
+VECT_VAR_DECL(expected_vld2_1,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected_vld2_1,int,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld2_1,uint,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1,
+					       0xf0, 0xf1, 0xf0, 0xf1 };
+VECT_VAR_DECL(expected_vld2_1,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff0, 0xfff1 };
+VECT_VAR_DECL(expected_vld2_1,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected_vld2_1,uint,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld2_1,poly,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1,
+					       0xf0, 0xf1, 0xf0, 0xf1 };
+VECT_VAR_DECL(expected_vld2_1,poly,16,4) [] = { 0xfff0, 0xfff1,
+						0xfff0, 0xfff1 };
+VECT_VAR_DECL(expected_vld2_1,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
+VECT_VAR_DECL(expected_vld2_1,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld2_1,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					       0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected_vld2_1,int,32,4) [] = { 0x33333333, 0x33333333,
+					       0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected_vld2_1,int,64,2) [] = { 0x3333333333333333,
+					       0x3333333333333333 };
+VECT_VAR_DECL(expected_vld2_1,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld2_1,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+						0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected_vld2_1,uint,32,4) [] = { 0x33333333, 0x33333333,
+						0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected_vld2_1,uint,64,2) [] = { 0x3333333333333333,
+						0x3333333333333333 };
+VECT_VAR_DECL(expected_vld2_1,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld2_1,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+						0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected_vld2_1,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+						  0x33333333, 0x33333333 };
+
+/* vld3_dup/chunk 0.  */
+VECT_VAR_DECL(expected_vld3_0,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf0,
+					      0xf1, 0xf2, 0xf0, 0xf1 };
+VECT_VAR_DECL(expected_vld3_0,int,16,4) [] = { 0xfff0, 0xfff1,
+					       0xfff2, 0xfff0 };
+VECT_VAR_DECL(expected_vld3_0,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected_vld3_0,int,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld3_0,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf0,
+					       0xf1, 0xf2, 0xf0, 0xf1 };
+VECT_VAR_DECL(expected_vld3_0,uint,16,4) [] = { 0xfff0, 0xfff1,
+						0xfff2, 0xfff0 };
+VECT_VAR_DECL(expected_vld3_0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected_vld3_0,uint,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld3_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf0,
+					       0xf1, 0xf2, 0xf0, 0xf1 };
+VECT_VAR_DECL(expected_vld3_0,poly,16,4) [] = { 0xfff0, 0xfff1,
+						0xfff2, 0xfff0 };
+VECT_VAR_DECL(expected_vld3_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
+VECT_VAR_DECL(expected_vld3_0,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld3_0,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					       0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected_vld3_0,int,32,4) [] = { 0x33333333, 0x33333333,
+					       0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected_vld3_0,int,64,2) [] = { 0x3333333333333333,
+					       0x3333333333333333 };
+VECT_VAR_DECL(expected_vld3_0,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld3_0,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+						0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected_vld3_0,uint,32,4) [] = { 0x33333333, 0x33333333,
+						0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected_vld3_0,uint,64,2) [] = { 0x3333333333333333,
+						0x3333333333333333 };
+VECT_VAR_DECL(expected_vld3_0,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld3_0,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+						0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected_vld3_0,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+						  0x33333333, 0x33333333 };
+
+/* vld3_dup/chunk 1.  */
+VECT_VAR_DECL(expected_vld3_1,int,8,8) [] = { 0xf2, 0xf0, 0xf1, 0xf2,
+					      0xf0, 0xf1, 0xf2, 0xf0 };
+VECT_VAR_DECL(expected_vld3_1,int,16,4) [] = { 0xfff1, 0xfff2,
+					       0xfff0, 0xfff1 };
+VECT_VAR_DECL(expected_vld3_1,int,32,2) [] = { 0xfffffff2, 0xfffffff0 };
+VECT_VAR_DECL(expected_vld3_1,int,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld3_1,uint,8,8) [] = { 0xf2, 0xf0, 0xf1, 0xf2,
+					       0xf0, 0xf1, 0xf2, 0xf0 };
+VECT_VAR_DECL(expected_vld3_1,uint,16,4) [] = { 0xfff1, 0xfff2,
+						0xfff0, 0xfff1 };
+VECT_VAR_DECL(expected_vld3_1,uint,32,2) [] = { 0xfffffff2, 0xfffffff0 };
+VECT_VAR_DECL(expected_vld3_1,uint,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld3_1,poly,8,8) [] = { 0xf2, 0xf0, 0xf1, 0xf2,
+					       0xf0, 0xf1, 0xf2, 0xf0 };
+VECT_VAR_DECL(expected_vld3_1,poly,16,4) [] = { 0xfff1, 0xfff2,
+						0xfff0, 0xfff1 };
+VECT_VAR_DECL(expected_vld3_1,hfloat,32,2) [] = { 0xc1600000, 0xc1800000 };
+VECT_VAR_DECL(expected_vld3_1,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld3_1,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					       0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected_vld3_1,int,32,4) [] = { 0x33333333, 0x33333333,
+					       0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected_vld3_1,int,64,2) [] = { 0x3333333333333333,
+					       0x3333333333333333 };
+VECT_VAR_DECL(expected_vld3_1,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld3_1,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+						0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected_vld3_1,uint,32,4) [] = { 0x33333333, 0x33333333,
+						0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected_vld3_1,uint,64,2) [] = { 0x3333333333333333,
+						0x3333333333333333 };
+VECT_VAR_DECL(expected_vld3_1,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld3_1,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+						0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected_vld3_1,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+						  0x33333333, 0x33333333 };
+
+/* vld3_dup/chunk 2.  */
+VECT_VAR_DECL(expected_vld3_2,int,8,8) [] = { 0xf1, 0xf2, 0xf0, 0xf1,
+					      0xf2, 0xf0, 0xf1, 0xf2 };
+VECT_VAR_DECL(expected_vld3_2,int,16,4) [] = { 0xfff2, 0xfff0,
+					       0xfff1, 0xfff2 };
+VECT_VAR_DECL(expected_vld3_2,int,32,2) [] = { 0xfffffff1, 0xfffffff2 };
+VECT_VAR_DECL(expected_vld3_2,int,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected_vld3_2,uint,8,8) [] = { 0xf1, 0xf2, 0xf0, 0xf1,
+					       0xf2, 0xf0, 0xf1, 0xf2 };
+VECT_VAR_DECL(expected_vld3_2,uint,16,4) [] = { 0xfff2, 0xfff0,
+						0xfff1, 0xfff2 };
+VECT_VAR_DECL(expected_vld3_2,uint,32,2) [] = { 0xfffffff1, 0xfffffff2 };
+VECT_VAR_DECL(expected_vld3_2,uint,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected_vld3_2,poly,8,8) [] = { 0xf1, 0xf2, 0xf0, 0xf1,
+					       0xf2, 0xf0, 0xf1, 0xf2 };
+VECT_VAR_DECL(expected_vld3_2,poly,16,4) [] = { 0xfff2, 0xfff0,
+						0xfff1, 0xfff2 };
+VECT_VAR_DECL(expected_vld3_2,hfloat,32,2) [] = { 0xc1700000, 0xc1600000 };
+VECT_VAR_DECL(expected_vld3_2,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld3_2,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					       0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected_vld3_2,int,32,4) [] = { 0x33333333, 0x33333333,
+					       0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected_vld3_2,int,64,2) [] = { 0x3333333333333333,
+					       0x3333333333333333 };
+VECT_VAR_DECL(expected_vld3_2,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld3_2,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+						0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected_vld3_2,uint,32,4) [] = { 0x33333333, 0x33333333,
+						0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected_vld3_2,uint,64,2) [] = { 0x3333333333333333,
+						0x3333333333333333 };
+VECT_VAR_DECL(expected_vld3_2,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld3_2,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+						0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected_vld3_2,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+						  0x33333333, 0x33333333 };
+
+/* vld4_dup/chunk 0.  */
+VECT_VAR_DECL(expected_vld4_0,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					      0xf0, 0xf1, 0xf2, 0xf3 };
+VECT_VAR_DECL(expected_vld4_0,int,16,4) [] = { 0xfff0, 0xfff1,
+					       0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld4_0,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected_vld4_0,int,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld4_0,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					       0xf0, 0xf1, 0xf2, 0xf3 };
+VECT_VAR_DECL(expected_vld4_0,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld4_0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected_vld4_0,uint,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld4_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					       0xf0, 0xf1, 0xf2, 0xf3 };
+VECT_VAR_DECL(expected_vld4_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld4_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
+VECT_VAR_DECL(expected_vld4_0,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld4_0,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					       0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected_vld4_0,int,32,4) [] = { 0x33333333, 0x33333333,
+					       0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected_vld4_0,int,64,2) [] = { 0x3333333333333333,
+					       0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_0,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld4_0,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+						0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected_vld4_0,uint,32,4) [] = { 0x33333333, 0x33333333,
+						0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected_vld4_0,uint,64,2) [] = { 0x3333333333333333,
+						0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_0,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld4_0,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+						0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected_vld4_0,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+						  0x33333333, 0x33333333 };
+
+/* vld4_dup/chunk 1.  */
+VECT_VAR_DECL(expected_vld4_1,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					      0xf0, 0xf1, 0xf2, 0xf3 };
+VECT_VAR_DECL(expected_vld4_1,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld4_1,int,32,2) [] = { 0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected_vld4_1,int,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld4_1,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					       0xf0, 0xf1, 0xf2, 0xf3 };
+VECT_VAR_DECL(expected_vld4_1,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld4_1,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected_vld4_1,uint,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld4_1,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					       0xf0, 0xf1, 0xf2, 0xf3 };
+VECT_VAR_DECL(expected_vld4_1,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld4_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
+VECT_VAR_DECL(expected_vld4_1,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld4_1,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					       0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected_vld4_1,int,32,4) [] = { 0x33333333, 0x33333333,
+					       0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected_vld4_1,int,64,2) [] = { 0x3333333333333333,
+					       0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_1,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld4_1,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+						0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected_vld4_1,uint,32,4) [] = { 0x33333333, 0x33333333,
+						0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected_vld4_1,uint,64,2) [] = { 0x3333333333333333,
+						0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_1,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld4_1,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+						0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected_vld4_1,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+						  0x33333333, 0x33333333 };
+
+/* vld4_dup/chunk 2.  */
+VECT_VAR_DECL(expected_vld4_2,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					      0xf0, 0xf1, 0xf2, 0xf3 };
+VECT_VAR_DECL(expected_vld4_2,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld4_2,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected_vld4_2,int,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected_vld4_2,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					       0xf0, 0xf1, 0xf2, 0xf3 };
+VECT_VAR_DECL(expected_vld4_2,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld4_2,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected_vld4_2,uint,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected_vld4_2,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					       0xf0, 0xf1, 0xf2, 0xf3 };
+VECT_VAR_DECL(expected_vld4_2,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld4_2,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
+VECT_VAR_DECL(expected_vld4_2,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld4_2,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					       0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected_vld4_2,int,32,4) [] = { 0x33333333, 0x33333333,
+					       0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected_vld4_2,int,64,2) [] = { 0x3333333333333333,
+					       0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_2,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld4_2,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+						0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected_vld4_2,uint,32,4) [] = { 0x33333333, 0x33333333,
+						0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected_vld4_2,uint,64,2) [] = { 0x3333333333333333,
+						0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_2,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld4_2,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+						0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected_vld4_2,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+						  0x33333333, 0x33333333 };
+
+/* vld4_dup/chunk3.  */
+VECT_VAR_DECL(expected_vld4_3,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					      0xf0, 0xf1, 0xf2, 0xf3 };
+VECT_VAR_DECL(expected_vld4_3,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld4_3,int,32,2) [] = { 0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected_vld4_3,int,64,1) [] = { 0xfffffffffffffff3 };
+VECT_VAR_DECL(expected_vld4_3,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					       0xf0, 0xf1, 0xf2, 0xf3 };
+VECT_VAR_DECL(expected_vld4_3,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld4_3,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected_vld4_3,uint,64,1) [] = { 0xfffffffffffffff3 };
+VECT_VAR_DECL(expected_vld4_3,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					       0xf0, 0xf1, 0xf2, 0xf3 };
+VECT_VAR_DECL(expected_vld4_3,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld4_3,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
+VECT_VAR_DECL(expected_vld4_3,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld4_3,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					       0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected_vld4_3,int,32,4) [] = { 0x33333333, 0x33333333,
+					       0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected_vld4_3,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_3,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld4_3,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+						0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected_vld4_3,uint,32,4) [] = { 0x33333333, 0x33333333,
+						0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected_vld4_3,uint,64,2) [] = { 0x3333333333333333,
+						0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_3,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld4_3,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+						0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected_vld4_3,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+						  0x33333333, 0x33333333 };
+
+void exec_vldX_dup (void)
+{
+  /* In this case, input variables are arrays of vectors.  */
+#define DECL_VLDX_DUP(T1, W, N, X)					\
+  VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector, T1, W, N, X);	\
+  VECT_VAR_DECL(result_bis_##X, T1, W, N)[X * N]
+
+  /* We need to use a temporary result buffer (result_bis), because
+     the one used for other tests is not large enough. A subset of the
+     result data is moved from result_bis to result, and it is this
+     subset which is used to check the actual behaviour. The next
+     macro enables to move another chunk of data from result_bis to
+     result.  */
+#define TEST_VLDX_DUP(Q, T1, T2, W, N, X)				\
+  VECT_ARRAY_VAR(vector, T1, W, N, X) =					\
+    vld##X##Q##_dup_##T2##W(&VECT_VAR(buffer_dup, T1, W, N)[0]);	\
+									\
+  vst##X##Q##_##T2##W(VECT_VAR(result_bis_##X, T1, W, N),		\
+		      VECT_ARRAY_VAR(vector, T1, W, N, X));		\
+  memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis_##X, T1, W, N), \
+	 sizeof(VECT_VAR(result, T1, W, N)));
+
+
+  /* Overwrite "result" with the contents of "result_bis"[Y].  */
+#define TEST_EXTRA_CHUNK(T1, W, N, X,Y)			\
+  memcpy(VECT_VAR(result, T1, W, N),			\
+	 &(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]),	\
+	 sizeof(VECT_VAR(result, T1, W, N)));
+
+#define DECL_ALL_VLDX_DUP(X)			\
+  DECL_VLDX_DUP(int, 8, 8, X);			\
+  DECL_VLDX_DUP(int, 16, 4, X);			\
+  DECL_VLDX_DUP(int, 32, 2, X);			\
+  DECL_VLDX_DUP(int, 64, 1, X);			\
+  DECL_VLDX_DUP(uint, 8, 8, X);			\
+  DECL_VLDX_DUP(uint, 16, 4, X);		\
+  DECL_VLDX_DUP(uint, 32, 2, X);		\
+  DECL_VLDX_DUP(uint, 64, 1, X);		\
+  DECL_VLDX_DUP(poly, 8, 8, X);			\
+  DECL_VLDX_DUP(poly, 16, 4, X);		\
+  DECL_VLDX_DUP(float, 32, 2, X)
+
+#define TEST_ALL_VLDX_DUP(X)			\
+  TEST_VLDX_DUP(, int, s, 8, 8, X);		\
+  TEST_VLDX_DUP(, int, s, 16, 4, X);		\
+  TEST_VLDX_DUP(, int, s, 32, 2, X);		\
+  TEST_VLDX_DUP(, int, s, 64, 1, X);		\
+  TEST_VLDX_DUP(, uint, u, 8, 8, X);		\
+  TEST_VLDX_DUP(, uint, u, 16, 4, X);		\
+  TEST_VLDX_DUP(, uint, u, 32, 2, X);		\
+  TEST_VLDX_DUP(, uint, u, 64, 1, X);		\
+  TEST_VLDX_DUP(, poly, p, 8, 8, X);		\
+  TEST_VLDX_DUP(, poly, p, 16, 4, X);		\
+  TEST_VLDX_DUP(, float, f, 32, 2, X)
+
+#define TEST_ALL_EXTRA_CHUNKS(X, Y)		\
+  TEST_EXTRA_CHUNK(int, 8, 8, X, Y);		\
+  TEST_EXTRA_CHUNK(int, 16, 4, X, Y);		\
+  TEST_EXTRA_CHUNK(int, 32, 2, X, Y);		\
+  TEST_EXTRA_CHUNK(int, 64, 1, X, Y);		\
+  TEST_EXTRA_CHUNK(uint, 8, 8, X, Y);		\
+  TEST_EXTRA_CHUNK(uint, 16, 4, X, Y);		\
+  TEST_EXTRA_CHUNK(uint, 32, 2, X, Y);		\
+  TEST_EXTRA_CHUNK(uint, 64, 1, X, Y);		\
+  TEST_EXTRA_CHUNK(poly, 8, 8, X, Y);		\
+  TEST_EXTRA_CHUNK(poly, 16, 4, X, Y);		\
+  TEST_EXTRA_CHUNK(float, 32, 2, X, Y)
+
+
+  DECL_ALL_VLDX_DUP(2);
+  DECL_ALL_VLDX_DUP(3);
+  DECL_ALL_VLDX_DUP(4);
+
+  /* Special input buffers of suitable size are needed for vld2/vld3/vld4.  */
+  /* Input buffers for vld2, 1 of each size */
+  VECT_ARRAY_INIT2(buffer_vld2, int, 8, 8);
+  PAD(buffer_vld2_pad, int, 8, 8);
+  VECT_ARRAY_INIT2(buffer_vld2, int, 16, 4);
+  PAD(buffer_vld2_pad, int, 16, 4);
+  VECT_ARRAY_INIT2(buffer_vld2, int, 32, 2);
+  PAD(buffer_vld2_pad, int, 32, 2);
+  VECT_ARRAY_INIT2(buffer_vld2, int, 64, 1);
+  PAD(buffer_vld2_pad, int, 64, 1);
+  VECT_ARRAY_INIT2(buffer_vld2, uint, 8, 8);
+  PAD(buffer_vld2_pad, uint, 8, 8);
+  VECT_ARRAY_INIT2(buffer_vld2, uint, 16, 4);
+  PAD(buffer_vld2_pad, uint, 16, 4);
+  VECT_ARRAY_INIT2(buffer_vld2, uint, 32, 2);
+  PAD(buffer_vld2_pad, uint, 32, 2);
+  VECT_ARRAY_INIT2(buffer_vld2, uint, 64, 1);
+  PAD(buffer_vld2_pad, uint, 64, 1);
+  VECT_ARRAY_INIT2(buffer_vld2, poly, 8, 8);
+  PAD(buffer_vld2_pad, poly, 8, 8);
+  VECT_ARRAY_INIT2(buffer_vld2, poly, 16, 4);
+  PAD(buffer_vld2_pad, poly, 16, 4);
+  VECT_ARRAY_INIT2(buffer_vld2, float, 32, 2);
+  PAD(buffer_vld2_pad, float, 32, 2);
+
+  VECT_ARRAY_INIT2(buffer_vld2, int, 8, 16);
+  PAD(buffer_vld2_pad, int, 8, 16);
+  VECT_ARRAY_INIT2(buffer_vld2, int, 16, 8);
+  PAD(buffer_vld2_pad, int, 16, 8);
+  VECT_ARRAY_INIT2(buffer_vld2, int, 32, 4);
+  PAD(buffer_vld2_pad, int, 32, 4);
+  VECT_ARRAY_INIT2(buffer_vld2, int, 64, 2);
+  PAD(buffer_vld2_pad, int, 64, 2);
+  VECT_ARRAY_INIT2(buffer_vld2, uint, 8, 16);
+  PAD(buffer_vld2_pad, uint, 8, 16);
+  VECT_ARRAY_INIT2(buffer_vld2, uint, 16, 8);
+  PAD(buffer_vld2_pad, uint, 16, 8);
+  VECT_ARRAY_INIT2(buffer_vld2, uint, 32, 4);
+  PAD(buffer_vld2_pad, uint, 32, 4);
+  VECT_ARRAY_INIT2(buffer_vld2, uint, 64, 2);
+  PAD(buffer_vld2_pad, uint, 64, 2);
+  VECT_ARRAY_INIT2(buffer_vld2, poly, 8, 16);
+  PAD(buffer_vld2_pad, poly, 8, 16);
+  VECT_ARRAY_INIT2(buffer_vld2, poly, 16, 8);
+  PAD(buffer_vld2_pad, poly, 16, 8);
+  VECT_ARRAY_INIT2(buffer_vld2, float, 32, 4);
+  PAD(buffer_vld2_pad, float, 32, 4);
+
+  /* Input buffers for vld3, 1 of each size */
+  VECT_ARRAY_INIT3(buffer_vld3, int, 8, 8);
+  PAD(buffer_vld3_pad, int, 8, 8);
+  VECT_ARRAY_INIT3(buffer_vld3, int, 16, 4);
+  PAD(buffer_vld3_pad, int, 16, 4);
+  VECT_ARRAY_INIT3(buffer_vld3, int, 32, 2);
+  PAD(buffer_vld3_pad, int, 32, 2);
+  VECT_ARRAY_INIT3(buffer_vld3, int, 64, 1);
+  PAD(buffer_vld3_pad, int, 64, 1);
+  VECT_ARRAY_INIT3(buffer_vld3, uint, 8, 8);
+  PAD(buffer_vld3_pad, uint, 8, 8);
+  VECT_ARRAY_INIT3(buffer_vld3, uint, 16, 4);
+  PAD(buffer_vld3_pad, uint, 16, 4);
+  VECT_ARRAY_INIT3(buffer_vld3, uint, 32, 2);
+  PAD(buffer_vld3_pad, uint, 32, 2);
+  VECT_ARRAY_INIT3(buffer_vld3, uint, 64, 1);
+  PAD(buffer_vld3_pad, uint, 64, 1);
+  VECT_ARRAY_INIT3(buffer_vld3, poly, 8, 8);
+  PAD(buffer_vld3_pad, poly, 8, 8);
+  VECT_ARRAY_INIT3(buffer_vld3, poly, 16, 4);
+  PAD(buffer_vld3_pad, poly, 16, 4);
+  VECT_ARRAY_INIT3(buffer_vld3, float, 32, 2);
+  PAD(buffer_vld3_pad, float, 32, 2);
+
+  VECT_ARRAY_INIT3(buffer_vld3, int, 8, 16);
+  PAD(buffer_vld3_pad, int, 8, 16);
+  VECT_ARRAY_INIT3(buffer_vld3, int, 16, 8);
+  PAD(buffer_vld3_pad, int, 16, 8);
+  VECT_ARRAY_INIT3(buffer_vld3, int, 32, 4);
+  PAD(buffer_vld3_pad, int, 32, 4);
+  VECT_ARRAY_INIT3(buffer_vld3, int, 64, 2);
+  PAD(buffer_vld3_pad, int, 64, 2);
+  VECT_ARRAY_INIT3(buffer_vld3, uint, 8, 16);
+  PAD(buffer_vld3_pad, uint, 8, 16);
+  VECT_ARRAY_INIT3(buffer_vld3, uint, 16, 8);
+  PAD(buffer_vld3_pad, uint, 16, 8);
+  VECT_ARRAY_INIT3(buffer_vld3, uint, 32, 4);
+  PAD(buffer_vld3_pad, uint, 32, 4);
+  VECT_ARRAY_INIT3(buffer_vld3, uint, 64, 2);
+  PAD(buffer_vld3_pad, uint, 64, 2);
+  VECT_ARRAY_INIT3(buffer_vld3, poly, 8, 16);
+  PAD(buffer_vld3_pad, poly, 8, 16);
+  VECT_ARRAY_INIT3(buffer_vld3, poly, 16, 8);
+  PAD(buffer_vld3_pad, poly, 16, 8);
+  VECT_ARRAY_INIT3(buffer_vld3, float, 32, 4);
+  PAD(buffer_vld3_pad, float, 32, 4);
+
+  /* Input buffers for vld4, 1 of each size */
+  VECT_ARRAY_INIT4(buffer_vld4, int, 8, 8);
+  PAD(buffer_vld4_pad, int, 8, 8);
+  VECT_ARRAY_INIT4(buffer_vld4, int, 16, 4);
+  PAD(buffer_vld4_pad, int, 16, 4);
+  VECT_ARRAY_INIT4(buffer_vld4, int, 32, 2);
+  PAD(buffer_vld4_pad, int, 32, 2);
+  VECT_ARRAY_INIT4(buffer_vld4, int, 64, 1);
+  PAD(buffer_vld4_pad, int, 64, 1);
+  VECT_ARRAY_INIT4(buffer_vld4, uint, 8, 8);
+  PAD(buffer_vld4_pad, uint, 8, 8);
+  VECT_ARRAY_INIT4(buffer_vld4, uint, 16, 4);
+  PAD(buffer_vld4_pad, uint, 16, 4);
+  VECT_ARRAY_INIT4(buffer_vld4, uint, 32, 2);
+  PAD(buffer_vld4_pad, uint, 32, 2);
+  VECT_ARRAY_INIT4(buffer_vld4, uint, 64, 1);
+  PAD(buffer_vld4_pad, uint, 64, 1);
+  VECT_ARRAY_INIT4(buffer_vld4, poly, 8, 8);
+  PAD(buffer_vld4_pad, poly, 8, 8);
+  VECT_ARRAY_INIT4(buffer_vld4, poly, 16, 4);
+  PAD(buffer_vld4_pad, poly, 16, 4);
+  VECT_ARRAY_INIT4(buffer_vld4, float, 32, 2);
+  PAD(buffer_vld4_pad, float, 32, 2);
+
+  VECT_ARRAY_INIT4(buffer_vld4, int, 8, 16);
+  PAD(buffer_vld4_pad, int, 8, 16);
+  VECT_ARRAY_INIT4(buffer_vld4, int, 16, 8);
+  PAD(buffer_vld4_pad, int, 16, 8);
+  VECT_ARRAY_INIT4(buffer_vld4, int, 32, 4);
+  PAD(buffer_vld4_pad, int, 32, 4);
+  VECT_ARRAY_INIT4(buffer_vld4, int, 64, 2);
+  PAD(buffer_vld4_pad, int, 64, 2);
+  VECT_ARRAY_INIT4(buffer_vld4, uint, 8, 16);
+  PAD(buffer_vld4_pad, uint, 8, 16);
+  VECT_ARRAY_INIT4(buffer_vld4, uint, 16, 8);
+  PAD(buffer_vld4_pad, uint, 16, 8);
+  VECT_ARRAY_INIT4(buffer_vld4, uint, 32, 4);
+  PAD(buffer_vld4_pad, uint, 32, 4);
+  VECT_ARRAY_INIT4(buffer_vld4, uint, 64, 2);
+  PAD(buffer_vld4_pad, uint, 64, 2);
+  VECT_ARRAY_INIT4(buffer_vld4, poly, 8, 16);
+  PAD(buffer_vld4_pad, poly, 8, 16);
+  VECT_ARRAY_INIT4(buffer_vld4, poly, 16, 8);
+  PAD(buffer_vld4_pad, poly, 16, 8);
+  VECT_ARRAY_INIT4(buffer_vld4, float, 32, 4);
+  PAD(buffer_vld4_pad, float, 32, 4);
+
+  /* Check vld2_dup/vld2q_dup.  */
+  clean_results ();
+#define TEST_MSG "VLD2_DUP/VLD2Q_DUP"
+  TEST_ALL_VLDX_DUP(2);
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld2_0, "chunk 0");
+
+  TEST_ALL_EXTRA_CHUNKS(2, 1);
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld2_1, "chunk 1");
+
+  /* Check vld3_dup/vld3q_dup.  */
+  clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VLD3_DUP/VLD3Q_DUP"
+  TEST_ALL_VLDX_DUP(3);
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld3_0, "chunk 0");
+
+  TEST_ALL_EXTRA_CHUNKS(3, 1);
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld3_1, "chunk 1");
+
+  TEST_ALL_EXTRA_CHUNKS(3, 2);
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld3_2, "chunk 2");
+
+  /* Check vld4_dup/vld4q_dup */
+  clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VLD4_DUP/VLD4Q_DUP"
+  TEST_ALL_VLDX_DUP(4);
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld4_0, "chunk 0");
+
+  TEST_ALL_EXTRA_CHUNKS(4, 1);
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld4_1, "chunk 1");
+
+  TEST_ALL_EXTRA_CHUNKS(4, 2);
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld4_2, "chunk 2");
+
+  TEST_ALL_EXTRA_CHUNKS(4, 3);
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld4_3, "chunk 3");
+}
+
+int main (void)
+{
+  exec_vldX_dup ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcage.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcage.c
@@ -0,0 +1,52 @@
+#define INSN_NAME vcage
+#define TEST_MSG "VCAGE/VCAGEQ"
+
+#include "cmp_fp_op.inc"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+				       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0x0 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333,
+					0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333,
+					0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+					0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333,
+					 0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff,
+					 0xffffffff, 0x0 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+					 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+					   0x33333333, 0x33333333 };
+
+VECT_VAR_DECL(expected2,uint,32,2) [] = { 0xffffffff, 0xffffffff };
+VECT_VAR_DECL(expected2,uint,32,4) [] = { 0xffffffff, 0xffffffff,
+					  0xffffffff, 0xffffffff };
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_low.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_low.c
@@ -0,0 +1,86 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+				       0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333,
+					0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+					0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333,
+					 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+					 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+					   0x33333333, 0x33333333 };
+
+#define TEST_MSG "VGET_LOW"
+void exec_vget_low (void)
+{
+  /* Basic test: vec64=vget_low(vec128), then store the result.  */
+#define TEST_VGET_LOW(T1, T2, W, N, N2)					\
+  VECT_VAR(vector64, T1, W, N) =					\
+    vget_low_##T2##W(VECT_VAR(vector128, T1, W, N2));			\
+  vst1_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector64, T1, W, N))
+
+  DECL_VARIABLE_64BITS_VARIANTS(vector64);
+  DECL_VARIABLE_128BITS_VARIANTS(vector128);
+
+  TEST_MACRO_128BITS_VARIANTS_2_5(VLOAD, vector128, buffer);
+  VLOAD(vector128, buffer, q, float, f, 32, 4);
+
+  clean_results ();
+
+  /* Execute the tests.  */
+  TEST_VGET_LOW(int, s, 8, 8, 16);
+  TEST_VGET_LOW(int, s, 16, 4, 8);
+  TEST_VGET_LOW(int, s, 32, 2, 4);
+  TEST_VGET_LOW(int, s, 64, 1, 2);
+  TEST_VGET_LOW(uint, u, 8, 8, 16);
+  TEST_VGET_LOW(uint, u, 16, 4, 8);
+  TEST_VGET_LOW(uint, u, 32, 2, 4);
+  TEST_VGET_LOW(uint, u, 64, 1, 2);
+  TEST_VGET_LOW(poly, p, 8, 8, 16);
+  TEST_VGET_LOW(poly, p, 16, 4, 8);
+  TEST_VGET_LOW(float, f, 32, 2, 4);
+
+  CHECK_RESULTS (TEST_MSG, "");
+}
+
+int main (void)
+{
+  exec_vget_low ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmvn.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmvn.c
@@ -0,0 +1,137 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf, 0xe, 0xd, 0xc,
+				       0xb, 0xa, 0x9, 0x8 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xf, 0xe, 0xd, 0xc };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xf, 0xe };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf, 0xe, 0xd, 0xc,
+					0xb, 0xa, 0x9, 0x8 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xf, 0xe, 0xd, 0xc };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xf, 0xe };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf, 0xe, 0xd, 0xc,
+					0xb, 0xa, 0x9, 0x8 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0xf, 0xe, 0xd, 0xc,
+					0xb, 0xa, 0x9, 0x8,
+					0x7, 0x6, 0x5, 0x4,
+					0x3, 0x2, 0x1, 0x0 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xf, 0xe, 0xd, 0xc,
+					0xb, 0xa, 0x9, 0x8 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xf, 0xe, 0xd, 0xc };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf, 0xe, 0xd, 0xc,
+					 0xb, 0xa, 0x9, 0x8,
+					 0x7, 0x6, 0x5, 0x4,
+					 0x3, 0x2, 0x1, 0x0 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xf, 0xe, 0xd, 0xc,
+					 0xb, 0xa, 0x9, 0x8 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xf, 0xe, 0xd, 0xc };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf, 0xe, 0xd, 0xc,
+					 0xb, 0xa, 0x9, 0x8,
+					 0x7, 0x6, 0x5, 0x4,
+					 0x3, 0x2, 0x1, 0x0 };
+
+#define INSN_NAME vmvn
+#define TEST_MSG "VMVN/VMVNQ"
+
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN_NAME)
+{
+  /* Basic test: y=OP(x), then store the result.  */
+#define TEST_UNARY_OP1(INSN, Q, T1, T2, W, N)				\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N));			\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+#define TEST_UNARY_OP(INSN, Q, T1, T2, W, N)				\
+  TEST_UNARY_OP1(INSN, Q, T1, T2, W, N)					\
+
+  /* No need for 64 bits variants.  */
+  DECL_VARIABLE(vector, int, 8, 8);
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, uint, 8, 8);
+  DECL_VARIABLE(vector, uint, 16, 4);
+  DECL_VARIABLE(vector, uint, 32, 2);
+  DECL_VARIABLE(vector, poly, 8, 8);
+  DECL_VARIABLE(vector, int, 8, 16);
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector, int, 32, 4);
+  DECL_VARIABLE(vector, uint, 8, 16);
+  DECL_VARIABLE(vector, uint, 16, 8);
+  DECL_VARIABLE(vector, uint, 32, 4);
+  DECL_VARIABLE(vector, poly, 8, 16);
+
+  DECL_VARIABLE(vector_res, int, 8, 8);
+  DECL_VARIABLE(vector_res, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 2);
+  DECL_VARIABLE(vector_res, uint, 8, 8);
+  DECL_VARIABLE(vector_res, uint, 16, 4);
+  DECL_VARIABLE(vector_res, uint, 32, 2);
+  DECL_VARIABLE(vector_res, poly, 8, 8);
+  DECL_VARIABLE(vector_res, int, 8, 16);
+  DECL_VARIABLE(vector_res, int, 16, 8);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+  DECL_VARIABLE(vector_res, uint, 8, 16);
+  DECL_VARIABLE(vector_res, uint, 16, 8);
+  DECL_VARIABLE(vector_res, uint, 32, 4);
+  DECL_VARIABLE(vector_res, poly, 8, 16);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer".  */
+  VLOAD(vector, buffer, , int, s, 8, 8);
+  VLOAD(vector, buffer, , int, s, 16, 4);
+  VLOAD(vector, buffer, , int, s, 32, 2);
+  VLOAD(vector, buffer, , uint, u, 8, 8);
+  VLOAD(vector, buffer, , uint, u, 16, 4);
+  VLOAD(vector, buffer, , uint, u, 32, 2);
+  VLOAD(vector, buffer, , poly, p, 8, 8);
+  VLOAD(vector, buffer, q, int, s, 8, 16);
+  VLOAD(vector, buffer, q, int, s, 16, 8);
+  VLOAD(vector, buffer, q, int, s, 32, 4);
+  VLOAD(vector, buffer, q, uint, u, 8, 16);
+  VLOAD(vector, buffer, q, uint, u, 16, 8);
+  VLOAD(vector, buffer, q, uint, u, 32, 4);
+  VLOAD(vector, buffer, q, poly, p, 8, 16);
+
+  /* Apply a unary operator named INSN_NAME.  */
+  TEST_UNARY_OP(INSN_NAME, , int, s, 8, 8);
+  TEST_UNARY_OP(INSN_NAME, , int, s, 16, 4);
+  TEST_UNARY_OP(INSN_NAME, , int, s, 32, 2);
+  TEST_UNARY_OP(INSN_NAME, , uint, u, 8, 8);
+  TEST_UNARY_OP(INSN_NAME, , uint, u, 16, 4);
+  TEST_UNARY_OP(INSN_NAME, , uint, u, 32, 2);
+  TEST_UNARY_OP(INSN_NAME, , poly, p, 8, 8);
+  TEST_UNARY_OP(INSN_NAME, q, int, s, 8, 16);
+  TEST_UNARY_OP(INSN_NAME, q, int, s, 16, 8);
+  TEST_UNARY_OP(INSN_NAME, q, int, s, 32, 4);
+  TEST_UNARY_OP(INSN_NAME, q, uint, u, 8, 16);
+  TEST_UNARY_OP(INSN_NAME, q, uint, u, 16, 8);
+  TEST_UNARY_OP(INSN_NAME, q, uint, u, 32, 4);
+  TEST_UNARY_OP(INSN_NAME, q, poly, p, 8, 16);
+
+  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, "");
+  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, "");
+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, "");
+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, "");
+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, "");
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, "");
+  CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected, "");
+  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, "");
+  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, "");
+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, "");
+  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, "");
+  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, "");
+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, "");
+  CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected, "");
+}
+
+int main (void)
+{
+  exec_vmvn ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vorr.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vorr.c
@@ -0,0 +1,48 @@
+#define INSN_NAME vorr
+#define TEST_MSG "VORR/VORRQ"
+
+#include "binary_op.inc"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf2, 0xf3, 0xf2, 0xf3,
+				       0xf6, 0xf7, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfffc, 0xfffd, 0xfffe, 0xffff };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff3, 0xfffffff3 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff4 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf4, 0xf5, 0xf6, 0xf7,
+					0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfffe, 0xffff, 0xfffe, 0xffff };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff8, 0xfffffff9 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0xf6, 0xf7, 0xf6, 0xf7,
+					0xf6, 0xf7, 0xf6, 0xf7,
+					0xfe, 0xff, 0xfe, 0xff,
+					0xfe, 0xff, 0xfe, 0xff };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xfffc, 0xfffd, 0xfffe, 0xffff,
+					0xfffc, 0xfffd, 0xfffe, 0xffff };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff2, 0xfffffff3,
+					0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff8,
+					0xfffffffffffffff9 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xfc, 0xfd, 0xfe, 0xff,
+					 0xfc, 0xfd, 0xfe, 0xff,
+					 0xfc, 0xfd, 0xfe, 0xff,
+					 0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3,
+					 0xfff7, 0xfff7, 0xfff7, 0xfff7 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff7, 0xfffffff7,
+					 0xfffffff7, 0xfffffff7 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff3,
+					 0xfffffffffffffff3 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+					   0x33333333, 0x33333333 };
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtrn.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtrn.c
@@ -0,0 +1,93 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results splitted in several chunks.  */
+/* Chunk 0.  */
+VECT_VAR_DECL(expected0,int,8,8) [] = { 0xf0, 0xf1, 0x11, 0x11,
+					0xf2, 0xf3, 0x11, 0x11 };
+VECT_VAR_DECL(expected0,int,16,4) [] = { 0xfff0, 0xfff1, 0x22, 0x22 };
+VECT_VAR_DECL(expected0,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected0,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected0,uint,8,8) [] = { 0xf0, 0xf1, 0x55, 0x55,
+					 0xf2, 0xf3, 0x55, 0x55 };
+VECT_VAR_DECL(expected0,uint,16,4) [] = { 0xfff0, 0xfff1, 0x66, 0x66 };
+VECT_VAR_DECL(expected0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected0,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf1, 0x55, 0x55,
+					 0xf2, 0xf3, 0x55, 0x55 };
+VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff1, 0x66, 0x66 };
+VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
+VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf1, 0x11, 0x11,
+					 0xf2, 0xf3, 0x11, 0x11,
+					 0xf4, 0xf5, 0x11, 0x11,
+					 0xf6, 0xf7, 0x11, 0x11 };
+VECT_VAR_DECL(expected0,int,16,8) [] = { 0xfff0, 0xfff1, 0x22, 0x22,
+					 0xfff2, 0xfff3, 0x22, 0x22 };
+VECT_VAR_DECL(expected0,int,32,4) [] = { 0xfffffff0, 0xfffffff1, 0x33, 0x33 };
+VECT_VAR_DECL(expected0,int,64,2) [] = { 0x3333333333333333,
+					 0x3333333333333333 };
+VECT_VAR_DECL(expected0,uint,8,16) [] = { 0xf0, 0xf1, 0x55, 0x55,
+					  0xf2, 0xf3, 0x55, 0x55,
+					  0xf4, 0xf5, 0x55, 0x55,
+					  0xf6, 0xf7, 0x55, 0x55 };
+VECT_VAR_DECL(expected0,uint,16,8) [] = { 0xfff0, 0xfff1, 0x66, 0x66,
+					  0xfff2, 0xfff3, 0x66, 0x66 };
+VECT_VAR_DECL(expected0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, 0x77, 0x77 };
+VECT_VAR_DECL(expected0,uint,64,2) [] = { 0x3333333333333333,
+					  0x3333333333333333 };
+VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf1, 0x55, 0x55,
+					  0xf2, 0xf3, 0x55, 0x55,
+					  0xf4, 0xf5, 0x55, 0x55,
+					  0xf6, 0xf7, 0x55, 0x55 };
+VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff1, 0x66, 0x66,
+					  0xfff2, 0xfff3, 0x66, 0x66 };
+VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
+					    0x42073333, 0x42073333 };
+
+/* Chunk 1.  */
+VECT_VAR_DECL(expected1,int,8,8) [] = { 0xf4, 0xf5, 0x11, 0x11,
+					0xf6, 0xf7, 0x11, 0x11 };
+VECT_VAR_DECL(expected1,int,16,4) [] = { 0xfff2, 0xfff3, 0x22, 0x22 };
+VECT_VAR_DECL(expected1,int,32,2) [] = { 0x33, 0x33 };
+VECT_VAR_DECL(expected1,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected1,uint,8,8) [] = { 0xf4, 0xf5, 0x55, 0x55,
+					 0xf6, 0xf7, 0x55, 0x55 };
+VECT_VAR_DECL(expected1,uint,16,4) [] = { 0xfff2, 0xfff3, 0x66, 0x66 };
+VECT_VAR_DECL(expected1,uint,32,2) [] = { 0x77, 0x77 };
+VECT_VAR_DECL(expected1,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf4, 0xf5, 0x55, 0x55,
+					 0xf6, 0xf7, 0x55, 0x55 };
+VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff2, 0xfff3, 0x66, 0x66 };
+VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0x42066666, 0x42066666 };
+VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf8, 0xf9, 0x11, 0x11,
+					 0xfa, 0xfb, 0x11, 0x11,
+					 0xfc, 0xfd, 0x11, 0x11,
+					 0xfe, 0xff, 0x11, 0x11 };
+VECT_VAR_DECL(expected1,int,16,8) [] = { 0xfff4, 0xfff5, 0x22, 0x22,
+					 0xfff6, 0xfff7, 0x22, 0x22 };
+VECT_VAR_DECL(expected1,int,32,4) [] = { 0xfffffff2, 0xfffffff3, 0x33, 0x33 };
+VECT_VAR_DECL(expected1,int,64,2) [] = { 0x3333333333333333,
+					 0x3333333333333333 };
+VECT_VAR_DECL(expected1,uint,8,16) [] = { 0xf8, 0xf9, 0x55, 0x55,
+					  0xfa, 0xfb, 0x55, 0x55,
+					  0xfc, 0xfd, 0x55, 0x55,
+					  0xfe, 0xff, 0x55, 0x55 };
+VECT_VAR_DECL(expected1,uint,16,8) [] = { 0xfff4, 0xfff5, 0x66, 0x66,
+					  0xfff6, 0xfff7, 0x66, 0x66 };
+VECT_VAR_DECL(expected1,uint,32,4) [] = { 0xfffffff2, 0xfffffff3, 0x77, 0x77 };
+VECT_VAR_DECL(expected1,uint,64,2) [] = { 0x3333333333333333,
+					  0x3333333333333333 };
+VECT_VAR_DECL(expected1,poly,8,16) [] = { 0xf8, 0xf9, 0x55, 0x55,
+					  0xfa, 0xfb, 0x55, 0x55,
+					  0xfc, 0xfd, 0x55, 0x55,
+					  0xfe, 0xff, 0x55, 0x55 };
+VECT_VAR_DECL(expected1,poly,16,8) [] = { 0xfff4, 0xfff5, 0x66, 0x66,
+					  0xfff6, 0xfff7, 0x66, 0x66 };
+VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0xc1600000, 0xc1500000,
+					    0x42073333, 0x42073333 };
+
+#define INSN_NAME vtrn
+#define TEST_MSG "VTRN/VTRNQ"
+
+#include "vshuffle.inc"
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlXl.inc
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlXl.inc
@@ -0,0 +1,89 @@
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+  /* vector_res = OP(vector, vector3, vector4),
+     then store the result.  */
+#define TEST_VMLXL1(INSN, T1, T2, W, W2, N)				\
+  VECT_VAR(vector_res, T1, W, N) =                                      \
+    INSN##_##T2##W2(VECT_VAR(vector, T1, W, N),                         \
+                    VECT_VAR(vector3, T1, W2, N),                       \
+                    VECT_VAR(vector4, T1, W2, N));                      \
+  vst1q_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+#define TEST_VMLXL(INSN, T1, T2, W, W2, N)	\
+  TEST_VMLXL1(INSN, T1, T2, W, W2, N)
+
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector3, int, 8, 8);
+  DECL_VARIABLE(vector4, int, 8, 8);
+  DECL_VARIABLE(vector_res, int, 16, 8);
+
+  DECL_VARIABLE(vector, int, 32, 4);
+  DECL_VARIABLE(vector3, int, 16, 4);
+  DECL_VARIABLE(vector4, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+
+  DECL_VARIABLE(vector, int, 64, 2);
+  DECL_VARIABLE(vector3, int, 32, 2);
+  DECL_VARIABLE(vector4, int, 32, 2);
+  DECL_VARIABLE(vector_res, int, 64, 2);
+
+  DECL_VARIABLE(vector, uint, 16, 8);
+  DECL_VARIABLE(vector3, uint, 8, 8);
+  DECL_VARIABLE(vector4, uint, 8, 8);
+  DECL_VARIABLE(vector_res, uint, 16, 8);
+
+  DECL_VARIABLE(vector, uint, 32, 4);
+  DECL_VARIABLE(vector3, uint, 16, 4);
+  DECL_VARIABLE(vector4, uint, 16, 4);
+  DECL_VARIABLE(vector_res, uint, 32, 4);
+
+  DECL_VARIABLE(vector, uint, 64, 2);
+  DECL_VARIABLE(vector3, uint, 32, 2);
+  DECL_VARIABLE(vector4, uint, 32, 2);
+  DECL_VARIABLE(vector_res, uint, 64, 2);
+
+  clean_results ();
+
+  VLOAD(vector, buffer, q, int, s, 16, 8);
+  VLOAD(vector, buffer, q, int, s, 32, 4);
+  VLOAD(vector, buffer, q, int, s, 64, 2);
+  VLOAD(vector, buffer, q, uint, u, 16, 8);
+  VLOAD(vector, buffer, q, uint, u, 32, 4);
+  VLOAD(vector, buffer, q, uint, u, 64, 2);
+
+  VDUP(vector3, , int, s, 8, 8, 0x55);
+  VDUP(vector4, , int, s, 8, 8, 0xBB);
+  VDUP(vector3, , int, s, 16, 4, 0x55);
+  VDUP(vector4, , int, s, 16, 4, 0xBB);
+  VDUP(vector3, , int, s, 32, 2, 0x55);
+  VDUP(vector4, , int, s, 32, 2, 0xBB);
+  VDUP(vector3, , uint, u, 8, 8, 0x55);
+  VDUP(vector4, , uint, u, 8, 8, 0xBB);
+  VDUP(vector3, , uint, u, 16, 4, 0x55);
+  VDUP(vector4, , uint, u, 16, 4, 0xBB);
+  VDUP(vector3, , uint, u, 32, 2, 0x55);
+  VDUP(vector4, , uint, u, 32, 2, 0xBB);
+
+  TEST_VMLXL(INSN_NAME, int, s, 16, 8, 8);
+  TEST_VMLXL(INSN_NAME, int, s, 32, 16, 4);
+  TEST_VMLXL(INSN_NAME, int, s, 64, 32, 2);
+  TEST_VMLXL(INSN_NAME, uint, u, 16, 8, 8);
+  TEST_VMLXL(INSN_NAME, uint, u, 32, 16, 4);
+  TEST_VMLXL(INSN_NAME, uint, u, 64, 32, 2);
+
+  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, "");
+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, "");
+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, "");
+  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, "");
+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, "");
+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, "");
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsubl.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsubl.c
@@ -0,0 +1,48 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define INSN_NAME vsubl
+#define TEST_MSG "VSUBL"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+				       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xfffd, 0xfffe, 0xffff, 0x0,
+					0x1, 0x2, 0x3, 0x4 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffffe, 0xffffffff, 0x0, 0x1 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x1 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfffd, 0xfffe, 0xffff, 0x0,
+					 0x1, 0x2, 0x3, 0x4 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0x0, 0x1, 0x2 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x0, 0x1 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+					   0x33333333, 0x33333333 };
+
+#include "vXXXl.inc"
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vXXXw.inc
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vXXXw.inc
@@ -0,0 +1,70 @@
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+  /* Basic test: y=vaddw(x1,x2), then store the result.  */
+#define TEST_VADDW1(INSN, T1, T2, W, W2, N)				\
+  VECT_VAR(vector_res, T1, W2, N) =					\
+    INSN##_##T2##W(VECT_VAR(vector, T1, W2, N),				\
+		   VECT_VAR(vector2, T1, W, N));			\
+  vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N))
+
+#define TEST_VADDW(INSN, T1, T2, W, W2, N)	\
+  TEST_VADDW1(INSN, T1, T2, W, W2, N)
+
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector, int, 32, 4);
+  DECL_VARIABLE(vector, int, 64, 2);
+  DECL_VARIABLE(vector, uint, 16, 8);
+  DECL_VARIABLE(vector, uint, 32, 4);
+  DECL_VARIABLE(vector, uint, 64, 2);
+
+  DECL_VARIABLE(vector2, int, 8, 8);
+  DECL_VARIABLE(vector2, int, 16, 4);
+  DECL_VARIABLE(vector2, int, 32, 2);
+  DECL_VARIABLE(vector2, uint, 8, 8);
+  DECL_VARIABLE(vector2, uint, 16, 4);
+  DECL_VARIABLE(vector2, uint, 32, 2);
+
+  DECL_VARIABLE(vector_res, int, 16, 8);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+  DECL_VARIABLE(vector_res, int, 64, 2);
+  DECL_VARIABLE(vector_res, uint, 16, 8);
+  DECL_VARIABLE(vector_res, uint, 32, 4);
+  DECL_VARIABLE(vector_res, uint, 64, 2);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer".  */
+  VLOAD(vector, buffer, q, int, s, 16, 8);
+  VLOAD(vector, buffer, q, int, s, 32, 4);
+  VLOAD(vector, buffer, q, int, s, 64, 2);
+  VLOAD(vector, buffer, q, uint, u, 16, 8);
+  VLOAD(vector, buffer, q, uint, u, 32, 4);
+  VLOAD(vector, buffer, q, uint, u, 64, 2);
+
+  /* Choose init value arbitrarily.  */
+  VDUP(vector2, , int, s, 8, 8, -13);
+  VDUP(vector2, , int, s, 16, 4, -14);
+  VDUP(vector2, , int, s, 32, 2, -16);
+  VDUP(vector2, , uint, u, 8, 8, 0xf3);
+  VDUP(vector2, , uint, u, 16, 4, 0xfff1);
+  VDUP(vector2, , uint, u, 32, 2, 0xfffffff0);
+
+  /* Execute the tests.  */
+  TEST_VADDW(INSN_NAME, int, s, 8, 16, 8);
+  TEST_VADDW(INSN_NAME, int, s, 16, 32, 4);
+  TEST_VADDW(INSN_NAME, int, s, 32, 64, 2);
+  TEST_VADDW(INSN_NAME, uint, u, 8, 16, 8);
+  TEST_VADDW(INSN_NAME, uint, u, 16, 32, 4);
+  TEST_VADDW(INSN_NAME, uint, u, 32, 64, 2);
+
+  CHECK_RESULTS (TEST_MSG, "");
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmlsl_n.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmlsl_n.c
@@ -0,0 +1,29 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define INSN_NAME vqdmlsl_n
+#define TEST_MSG "VQDMLSL_N"
+
+/* Expected values of cumulative_saturation flag.  */
+int VECT_VAR(expected_cumulative_sat,int,32,4) = 0;
+int VECT_VAR(expected_cumulative_sat,int,64,2) = 0;
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffe95c, 0xffffe95d,
+					0xffffe95e, 0xffffe95f };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffde12,
+					0xffffffffffffde13 };
+
+/* Expected values of cumulative_saturation flag when saturation
+   occurs.  */
+int VECT_VAR(expected_cumulative_sat2,int,32,4) = 1;
+int VECT_VAR(expected_cumulative_sat2,int,64,2) = 1;
+
+/* Expected results when saturation occurs.  */
+VECT_VAR_DECL(expected2,int,32,4) [] = { 0x80000000, 0x80000000,
+					 0x80000000, 0x80000000 };
+VECT_VAR_DECL(expected2,int,64,2) [] = { 0x8000000000000000,
+					 0x8000000000000000 };
+
+#include "vqdmlXl_n.inc"
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmull.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmull.c
@@ -0,0 +1,75 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x100, 0xe1, 0xc4, 0xa9,
+					0x90, 0x79, 0x64, 0x51 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x100, 0xe1, 0xc4, 0xa9 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x100, 0xe1 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xe100, 0xe2e1, 0xe4c4, 0xe6a9,
+					 0xe890, 0xea79, 0xec64, 0xee51 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffe00100, 0xffe200e1,
+					 0xffe400c4, 0xffe600a9 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffe000000100,
+					 0xffffffe2000000e1 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x5500, 0x5501, 0x5504, 0x5505,
+					 0x5510, 0x5511, 0x5514, 0x5515 };
+
+#define TEST_MSG "VMULL"
+void exec_vmull (void)
+{
+  /* Basic test: y=vmull(x,x), then store the result.  */
+#define TEST_VMULL(T1, T2, W, W2, N)					\
+  VECT_VAR(vector_res, T1, W2, N) =					\
+    vmull_##T2##W(VECT_VAR(vector, T1, W, N),				\
+		  VECT_VAR(vector, T1, W, N));				\
+  vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N))
+
+  DECL_VARIABLE(vector, int, 8, 8);
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, uint, 8, 8);
+  DECL_VARIABLE(vector, uint, 16, 4);
+  DECL_VARIABLE(vector, uint, 32, 2);
+  DECL_VARIABLE(vector, poly, 8, 8);
+  DECL_VARIABLE(vector_res, int, 16, 8);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+  DECL_VARIABLE(vector_res, int, 64, 2);
+  DECL_VARIABLE(vector_res, uint, 16, 8);
+  DECL_VARIABLE(vector_res, uint, 32, 4);
+  DECL_VARIABLE(vector_res, uint, 64, 2);
+  DECL_VARIABLE(vector_res, poly, 16, 8);
+
+  clean_results ();
+
+  VLOAD(vector, buffer, , int, s, 8, 8);
+  VLOAD(vector, buffer, , int, s, 16, 4);
+  VLOAD(vector, buffer, , int, s, 32, 2);
+  VLOAD(vector, buffer, , uint, u, 8, 8);
+  VLOAD(vector, buffer, , uint, u, 16, 4);
+  VLOAD(vector, buffer, , uint, u, 32, 2);
+  VLOAD(vector, buffer, , poly, p, 8, 8);
+
+  TEST_VMULL(int, s, 8, 16, 8);
+  TEST_VMULL(int, s, 16, 32, 4);
+  TEST_VMULL(int, s, 32, 64, 2);
+  TEST_VMULL(uint, u, 8, 16, 8);
+  TEST_VMULL(uint, u, 16, 32, 4);
+  TEST_VMULL(uint, u, 32, 64, 2);
+  TEST_VMULL(poly, p, 8, 16, 8);
+
+  CHECK(TEST_MSG, int, 16, 8, PRIx64, expected, "");
+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, "");
+  CHECK(TEST_MSG, int, 64, 2, PRIx32, expected, "");
+  CHECK(TEST_MSG, uint, 16, 8, PRIx64, expected, "");
+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, "");
+  CHECK(TEST_MSG, uint, 64, 2, PRIx32, expected, "");
+  CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected, "");
+}
+
+int main (void)
+{
+  exec_vmull ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmlal.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmlal.c
@@ -0,0 +1,27 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define INSN_NAME vqdmlal
+#define TEST_MSG "VQDMLAL"
+
+/* Expected values of cumulative_saturation flag.  */
+int VECT_VAR(expected_cumulative_sat,int,32,4) = 0;
+int VECT_VAR(expected_cumulative_sat,int,64,2) = 0;
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x7c1e, 0x7c1f, 0x7c20, 0x7c21 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x7c1e, 0x7c1f };
+
+/* Expected values of cumulative_saturation flag when saturation
+   occurs.  */
+int VECT_VAR(expected_cumulative_sat2,int,32,4) = 1;
+int VECT_VAR(expected_cumulative_sat2,int,64,2) = 1;
+
+/* Expected results when saturation occurs.  */
+VECT_VAR_DECL(expected2,int,32,4) [] = { 0x7fffffef, 0x7ffffff0,
+					 0x7ffffff1, 0x7ffffff2 };
+VECT_VAR_DECL(expected2,int,64,2) [] = { 0x7fffffffffffffef,
+					 0x7ffffffffffffff0 };
+
+#include "vqdmlXl.inc"
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul_n.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul_n.c
@@ -0,0 +1,96 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfef0, 0xff01, 0xff12, 0xff23 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffde0, 0xfffffe02 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfcd0, 0xfd03, 0xfd36, 0xfd69 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffbc0, 0xfffffc04 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc3b26666, 0xc3a74000 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xfab0, 0xfb05, 0xfb5a, 0xfbaf,
+					0xfc04, 0xfc59, 0xfcae, 0xfd03 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffff9a0, 0xfffffa06,
+					0xfffffa6c, 0xfffffad2 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xf890, 0xf907, 0xf97e, 0xf9f5,
+					 0xfa6c, 0xfae3, 0xfb5a, 0xfbd1 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffff780, 0xfffff808,
+					 0xfffff890, 0xfffff918 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc4b1cccd, 0xc4a6b000,
+					   0xc49b9333, 0xc4907667 };
+
+#define INSN_NAME vmul_n
+#define TEST_MSG "VMUL_N"
+
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+#define DECL_VMUL(VAR)				\
+  DECL_VARIABLE(VAR, int, 16, 4);		\
+  DECL_VARIABLE(VAR, int, 32, 2);		\
+  DECL_VARIABLE(VAR, uint, 16, 4);		\
+  DECL_VARIABLE(VAR, uint, 32, 2);		\
+  DECL_VARIABLE(VAR, float, 32, 2);		\
+  DECL_VARIABLE(VAR, int, 16, 8);		\
+  DECL_VARIABLE(VAR, int, 32, 4);		\
+  DECL_VARIABLE(VAR, uint, 16, 8);		\
+  DECL_VARIABLE(VAR, uint, 32, 4);		\
+  DECL_VARIABLE(VAR, float, 32, 4)
+
+  /* vector_res = vmul_n(vector,val), then store the result.  */
+#define TEST_VMUL_N(Q, T1, T2, W, N, L)					\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    vmul##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N),			\
+			L);						\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),				\
+		    VECT_VAR(vector_res, T1, W, N))
+
+  DECL_VMUL(vector);
+  DECL_VMUL(vector_res);
+
+  clean_results ();
+
+  /* Initialize vector from pre-initialized values.  */
+  VLOAD(vector, buffer, , int, s, 16, 4);
+  VLOAD(vector, buffer, , int, s, 32, 2);
+  VLOAD(vector, buffer, , uint, u, 16, 4);
+  VLOAD(vector, buffer, , uint, u, 32, 2);
+  VLOAD(vector, buffer, , float, f, 32, 2);
+  VLOAD(vector, buffer, q, int, s, 16, 8);
+  VLOAD(vector, buffer, q, int, s, 32, 4);
+  VLOAD(vector, buffer, q, uint, u, 16, 8);
+  VLOAD(vector, buffer, q, uint, u, 32, 4);
+  VLOAD(vector, buffer, q, float, f, 32, 4);
+
+  /* Choose multiplier arbitrarily.  */
+  TEST_VMUL_N(, int, s, 16, 4, 0x11);
+  TEST_VMUL_N(, int, s, 32, 2, 0x22);
+  TEST_VMUL_N(, uint, u, 16, 4, 0x33);
+  TEST_VMUL_N(, uint, u, 32, 2, 0x44);
+  TEST_VMUL_N(, float, f, 32, 2, 22.3f);
+  TEST_VMUL_N(q, int, s, 16, 8, 0x55);
+  TEST_VMUL_N(q, int, s, 32, 4, 0x66);
+  TEST_VMUL_N(q, uint, u, 16, 8, 0x77);
+  TEST_VMUL_N(q, uint, u, 32, 4, 0x88);
+  TEST_VMUL_N(q, float, f, 32, 4, 88.9f);
+
+  CHECK(TEST_MSG, int, 16, 4, PRIx64, expected, "");
+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, "");
+  CHECK(TEST_MSG, uint, 16, 4, PRIx64, expected, "");
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, "");
+  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, "");
+  CHECK(TEST_MSG, int, 16, 8, PRIx64, expected, "");
+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, "");
+  CHECK(TEST_MSG, uint, 16, 8, PRIx64, expected, "");
+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, "");
+  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, "");
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c
@@ -0,0 +1,253 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* We test vdup and vmov in the same place since they are aliases.  */
+
+/* Expected results.  */
+/* Chunk 0.  */
+VECT_VAR_DECL(expected0,int,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
+					0xf0, 0xf0, 0xf0, 0xf0 };
+VECT_VAR_DECL(expected0,int,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+VECT_VAR_DECL(expected0,int,32,2) [] = { 0xfffffff0, 0xfffffff0 };
+VECT_VAR_DECL(expected0,int,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected0,uint,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
+					 0xf0, 0xf0, 0xf0, 0xf0 };
+VECT_VAR_DECL(expected0,uint,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+VECT_VAR_DECL(expected0,uint,32,2) [] = { 0xfffffff0, 0xfffffff0 };
+VECT_VAR_DECL(expected0,uint,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
+					 0xf0, 0xf0, 0xf0, 0xf0 };
+VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1800000 };
+VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
+					 0xf0, 0xf0, 0xf0, 0xf0,
+					 0xf0, 0xf0, 0xf0, 0xf0,
+					 0xf0, 0xf0, 0xf0, 0xf0 };
+VECT_VAR_DECL(expected0,int,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0,
+					 0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+VECT_VAR_DECL(expected0,int,32,4) [] = { 0xfffffff0, 0xfffffff0,
+					 0xfffffff0, 0xfffffff0 };
+VECT_VAR_DECL(expected0,int,64,2) [] = { 0xfffffffffffffff0,
+					 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected0,uint,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
+					  0xf0, 0xf0, 0xf0, 0xf0,
+					  0xf0, 0xf0, 0xf0, 0xf0,
+					  0xf0, 0xf0, 0xf0, 0xf0 };
+VECT_VAR_DECL(expected0,uint,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0,
+					  0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+VECT_VAR_DECL(expected0,uint,32,4) [] = { 0xfffffff0, 0xfffffff0,
+					  0xfffffff0, 0xfffffff0 };
+VECT_VAR_DECL(expected0,uint,64,2) [] = { 0xfffffffffffffff0,
+					  0xfffffffffffffff0 };
+VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
+					  0xf0, 0xf0, 0xf0, 0xf0,
+					  0xf0, 0xf0, 0xf0, 0xf0,
+					  0xf0, 0xf0, 0xf0, 0xf0 };
+VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0,
+					  0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1800000,
+					    0xc1800000, 0xc1800000 };
+
+/* Chunk 1.  */
+VECT_VAR_DECL(expected1,int,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
+					0xf1, 0xf1, 0xf1, 0xf1 };
+VECT_VAR_DECL(expected1,int,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+VECT_VAR_DECL(expected1,int,32,2) [] = { 0xfffffff1, 0xfffffff1 };
+VECT_VAR_DECL(expected1,int,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected1,uint,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
+					 0xf1, 0xf1, 0xf1, 0xf1 };
+VECT_VAR_DECL(expected1,uint,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+VECT_VAR_DECL(expected1,uint,32,2) [] = { 0xfffffff1, 0xfffffff1 };
+VECT_VAR_DECL(expected1,uint,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
+					 0xf1, 0xf1, 0xf1, 0xf1 };
+VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 };
+VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
+					 0xf1, 0xf1, 0xf1, 0xf1,
+					 0xf1, 0xf1, 0xf1, 0xf1,
+					 0xf1, 0xf1, 0xf1, 0xf1 };
+VECT_VAR_DECL(expected1,int,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1,
+					 0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+VECT_VAR_DECL(expected1,int,32,4) [] = { 0xfffffff1, 0xfffffff1,
+					 0xfffffff1, 0xfffffff1 };
+VECT_VAR_DECL(expected1,int,64,2) [] = { 0xfffffffffffffff1,
+					 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected1,uint,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
+					  0xf1, 0xf1, 0xf1, 0xf1,
+					  0xf1, 0xf1, 0xf1, 0xf1,
+					  0xf1, 0xf1, 0xf1, 0xf1 };
+VECT_VAR_DECL(expected1,uint,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1,
+					  0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+VECT_VAR_DECL(expected1,uint,32,4) [] = { 0xfffffff1, 0xfffffff1,
+					  0xfffffff1, 0xfffffff1 };
+VECT_VAR_DECL(expected1,uint,64,2) [] = { 0xfffffffffffffff1,
+					  0xfffffffffffffff1 };
+VECT_VAR_DECL(expected1,poly,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
+					  0xf1, 0xf1, 0xf1, 0xf1,
+					  0xf1, 0xf1, 0xf1, 0xf1,
+					  0xf1, 0xf1, 0xf1, 0xf1 };
+VECT_VAR_DECL(expected1,poly,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1,
+					  0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0xc1700000, 0xc1700000,
+					    0xc1700000, 0xc1700000 };
+
+/* Chunk 2.  */
+VECT_VAR_DECL(expected2,int,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
+					0xf2, 0xf2, 0xf2, 0xf2 };
+VECT_VAR_DECL(expected2,int,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+VECT_VAR_DECL(expected2,int,32,2) [] = { 0xfffffff2, 0xfffffff2 };
+VECT_VAR_DECL(expected2,int,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected2,uint,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
+					 0xf2, 0xf2, 0xf2, 0xf2 };
+VECT_VAR_DECL(expected2,uint,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+VECT_VAR_DECL(expected2,uint,32,2) [] = { 0xfffffff2, 0xfffffff2 };
+VECT_VAR_DECL(expected2,uint,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected2,poly,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
+					 0xf2, 0xf2, 0xf2, 0xf2 };
+VECT_VAR_DECL(expected2,poly,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+VECT_VAR_DECL(expected2,hfloat,32,2) [] = { 0xc1600000, 0xc1600000 };
+VECT_VAR_DECL(expected2,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
+					 0xf2, 0xf2, 0xf2, 0xf2,
+					 0xf2, 0xf2, 0xf2, 0xf2,
+					 0xf2, 0xf2, 0xf2, 0xf2 };
+VECT_VAR_DECL(expected2,int,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2,
+					 0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+VECT_VAR_DECL(expected2,int,32,4) [] = { 0xfffffff2, 0xfffffff2,
+					 0xfffffff2, 0xfffffff2 };
+VECT_VAR_DECL(expected2,int,64,2) [] = { 0xfffffffffffffff2,
+					 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected2,uint,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
+					  0xf2, 0xf2, 0xf2, 0xf2,
+					  0xf2, 0xf2, 0xf2, 0xf2,
+					  0xf2, 0xf2, 0xf2, 0xf2 };
+VECT_VAR_DECL(expected2,uint,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2,
+					  0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+VECT_VAR_DECL(expected2,uint,32,4) [] = { 0xfffffff2, 0xfffffff2,
+					  0xfffffff2, 0xfffffff2 };
+VECT_VAR_DECL(expected2,uint,64,2) [] = { 0xfffffffffffffff2,
+					  0xfffffffffffffff2 };
+VECT_VAR_DECL(expected2,poly,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
+					  0xf2, 0xf2, 0xf2, 0xf2,
+					  0xf2, 0xf2, 0xf2, 0xf2,
+					  0xf2, 0xf2, 0xf2, 0xf2 };
+VECT_VAR_DECL(expected2,poly,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2,
+					  0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1600000, 0xc1600000,
+					    0xc1600000, 0xc1600000 };
+
+#define TEST_MSG "VDUP/VDUPQ"
+void exec_vdup_vmov (void)
+{
+  int i;
+
+  /* Basic test: vec=vdup(x), then store the result.  */
+#undef TEST_VDUP
+#define TEST_VDUP(Q, T1, T2, W, N)					\
+  VECT_VAR(vector, T1, W, N) =						\
+    vdup##Q##_n_##T2##W(VECT_VAR(buffer_dup, T1, W, N)[i]);		\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector, T1, W, N))
+
+  /* Basic test: vec=vmov(x), then store the result.  */
+#define TEST_VMOV(Q, T1, T2, W, N)					\
+  VECT_VAR(vector, T1, W, N) =						\
+    vmov##Q##_n_##T2##W(VECT_VAR(buffer_dup, T1, W, N)[i]);		\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector, T1, W, N))
+
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+
+  /* Try to read different places from the input buffer.  */
+  for (i=0; i< 3; i++) {
+    clean_results ();
+
+    TEST_VDUP(, int, s, 8, 8);
+    TEST_VDUP(, int, s, 16, 4);
+    TEST_VDUP(, int, s, 32, 2);
+    TEST_VDUP(, int, s, 64, 1);
+    TEST_VDUP(, uint, u, 8, 8);
+    TEST_VDUP(, uint, u, 16, 4);
+    TEST_VDUP(, uint, u, 32, 2);
+    TEST_VDUP(, uint, u, 64, 1);
+    TEST_VDUP(, poly, p, 8, 8);
+    TEST_VDUP(, poly, p, 16, 4);
+    TEST_VDUP(, float, f, 32, 2);
+
+    TEST_VDUP(q, int, s, 8, 16);
+    TEST_VDUP(q, int, s, 16, 8);
+    TEST_VDUP(q, int, s, 32, 4);
+    TEST_VDUP(q, int, s, 64, 2);
+    TEST_VDUP(q, uint, u, 8, 16);
+    TEST_VDUP(q, uint, u, 16, 8);
+    TEST_VDUP(q, uint, u, 32, 4);
+    TEST_VDUP(q, uint, u, 64, 2);
+    TEST_VDUP(q, poly, p, 8, 16);
+    TEST_VDUP(q, poly, p, 16, 8);
+    TEST_VDUP(q, float, f, 32, 4);
+
+    switch (i) {
+    case 0:
+      CHECK_RESULTS_NAMED (TEST_MSG, expected0, "");
+      break;
+    case 1:
+      CHECK_RESULTS_NAMED (TEST_MSG, expected1, "");
+      break;
+    case 2:
+      CHECK_RESULTS_NAMED (TEST_MSG, expected2, "");
+      break;
+    default:
+      abort();
+    }
+  }
+
+  /* Do the same tests with vmov. Use the same expected results.  */
+#undef TEST_MSG
+#define TEST_MSG "VMOV/VMOVQ"
+  for (i=0; i< 3; i++) {
+    clean_results ();
+
+    TEST_VMOV(, int, s, 8, 8);
+    TEST_VMOV(, int, s, 16, 4);
+    TEST_VMOV(, int, s, 32, 2);
+    TEST_VMOV(, int, s, 64, 1);
+    TEST_VMOV(, uint, u, 8, 8);
+    TEST_VMOV(, uint, u, 16, 4);
+    TEST_VMOV(, uint, u, 32, 2);
+    TEST_VMOV(, uint, u, 64, 1);
+    TEST_VMOV(, poly, p, 8, 8);
+    TEST_VMOV(, poly, p, 16, 4);
+    TEST_VMOV(, float, f, 32, 2);
+
+    TEST_VMOV(q, int, s, 8, 16);
+    TEST_VMOV(q, int, s, 16, 8);
+    TEST_VMOV(q, int, s, 32, 4);
+    TEST_VMOV(q, int, s, 64, 2);
+    TEST_VMOV(q, uint, u, 8, 16);
+    TEST_VMOV(q, uint, u, 16, 8);
+    TEST_VMOV(q, uint, u, 32, 4);
+    TEST_VMOV(q, uint, u, 64, 2);
+    TEST_VMOV(q, poly, p, 8, 16);
+    TEST_VMOV(q, poly, p, 16, 8);
+    TEST_VMOV(q, float, f, 32, 4);
+
+    switch (i) {
+    case 0:
+      CHECK_RESULTS_NAMED (TEST_MSG, expected0, "");
+      break;
+    case 1:
+      CHECK_RESULTS_NAMED (TEST_MSG, expected1, "");
+      break;
+    case 2:
+      CHECK_RESULTS_NAMED (TEST_MSG, expected2, "");
+      break;
+    default:
+      abort();
+    }
+  }
+}
+
+int main (void)
+{
+  exec_vdup_vmov ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp.c
@@ -0,0 +1,105 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results splitted in several chunks.  */
+/* Chunk 0.  */
+VECT_VAR_DECL(expected0,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected0,int,16,4) [] = { 0xfff0, 0xfff1,
+					 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected0,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected0,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected0,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					 0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected0,uint,16,4) [] = { 0xfff0, 0xfff1,
+					  0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected0,uint,32,2) [] = { 0xfffffff0,
+					  0xfffffff1 };
+VECT_VAR_DECL(expected0,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					 0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff1,
+					  0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
+VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					 0xf4, 0xf5, 0xf6, 0xf7,
+					 0xf8, 0xf9, 0xfa, 0xfb,
+					 0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected0,int,16,8) [] = { 0xfff0, 0xfff1,
+					 0xfff2, 0xfff3,
+					 0xfff4, 0xfff5,
+					 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected0,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
+					 0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected0,int,64,2) [] = { 0x3333333333333333,
+					 0x3333333333333333 };
+VECT_VAR_DECL(expected0,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					  0xf4, 0xf5, 0xf6, 0xf7,
+					  0xf8, 0xf9, 0xfa, 0xfb,
+					  0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected0,uint,16,8) [] = { 0xfff0, 0xfff1,
+					  0xfff2, 0xfff3,
+					  0xfff4, 0xfff5,
+					  0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
+					  0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected0,uint,64,2) [] = { 0x3333333333333333,
+					  0x3333333333333333 };
+VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					  0xf4, 0xf5, 0xf6, 0xf7,
+					  0xf8, 0xf9, 0xfa, 0xfb,
+					  0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff1,
+					  0xfff2, 0xfff3,
+					  0xfff4, 0xfff5,
+					  0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
+					    0xc1600000, 0xc1500000 };
+
+/* Chunk 1.  */
+VECT_VAR_DECL(expected1,int,8,8) [] = { 0x11, 0x11, 0x11, 0x11,
+					0x11, 0x11, 0x11, 0x11 };
+VECT_VAR_DECL(expected1,int,16,4) [] = { 0x22, 0x22, 0x22, 0x22 };
+VECT_VAR_DECL(expected1,int,32,2) [] = { 0x33, 0x33 };
+VECT_VAR_DECL(expected1,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected1,uint,8,8) [] = { 0x55, 0x55, 0x55, 0x55,
+					 0x55, 0x55, 0x55, 0x55 };
+VECT_VAR_DECL(expected1,uint,16,4) [] = { 0x66, 0x66, 0x66, 0x66 };
+VECT_VAR_DECL(expected1,uint,32,2) [] = { 0x77, 0x77 };
+VECT_VAR_DECL(expected1,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected1,poly,8,8) [] = { 0x55, 0x55, 0x55, 0x55,
+					 0x55, 0x55, 0x55, 0x55 };
+VECT_VAR_DECL(expected1,poly,16,4) [] = { 0x66, 0x66, 0x66, 0x66 };
+VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0x42066666, 0x42066666 };
+VECT_VAR_DECL(expected1,int,8,16) [] = { 0x11, 0x11, 0x11, 0x11,
+					 0x11, 0x11, 0x11, 0x11,
+					 0x11, 0x11, 0x11, 0x11,
+					 0x11, 0x11, 0x11, 0x11 };
+VECT_VAR_DECL(expected1,int,16,8) [] = { 0x22, 0x22, 0x22, 0x22,
+					 0x22, 0x22, 0x22, 0x22 };
+VECT_VAR_DECL(expected1,int,32,4) [] = { 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected1,int,64,2) [] = { 0x3333333333333333,
+					 0x3333333333333333 };
+VECT_VAR_DECL(expected1,uint,8,16) [] = { 0x55, 0x55, 0x55, 0x55,
+					  0x55, 0x55, 0x55, 0x55,
+					  0x55, 0x55, 0x55, 0x55,
+					  0x55, 0x55, 0x55, 0x55 };
+VECT_VAR_DECL(expected1,uint,16,8) [] = { 0x66, 0x66, 0x66, 0x66,
+					  0x66, 0x66, 0x66, 0x66 };
+VECT_VAR_DECL(expected1,uint,32,4) [] = { 0x77, 0x77, 0x77, 0x77 };
+VECT_VAR_DECL(expected1,uint,64,2) [] = { 0x3333333333333333,
+					  0x3333333333333333 };
+VECT_VAR_DECL(expected1,poly,8,16) [] = { 0x55, 0x55, 0x55, 0x55,
+					  0x55, 0x55, 0x55, 0x55,
+					  0x55, 0x55, 0x55, 0x55,
+					  0x55, 0x55, 0x55, 0x55 };
+VECT_VAR_DECL(expected1,poly,16,8) [] = { 0x66, 0x66, 0x66, 0x66,
+					  0x66, 0x66, 0x66, 0x66 };
+VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0x42073333, 0x42073333,
+					    0x42073333, 0x42073333 };
+
+#define INSN_NAME vuzp
+#define TEST_MSG "VUZP/VUZPQ"
+
+#include "vshuffle.inc"
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/cmp_op.inc
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/cmp_op.inc
@@ -0,0 +1,224 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+#include <math.h>
+
+/* Additional expected results declaration, they are initialized in
+   each test file.  */
+extern ARRAY(expected_uint, uint, 8, 8);
+extern ARRAY(expected_uint, uint, 16, 4);
+extern ARRAY(expected_uint, uint, 32, 2);
+extern ARRAY(expected_q_uint, uint, 8, 16);
+extern ARRAY(expected_q_uint, uint, 16, 8);
+extern ARRAY(expected_q_uint, uint, 32, 4);
+extern ARRAY(expected_float, uint, 32, 2);
+extern ARRAY(expected_q_float, uint, 32, 4);
+extern ARRAY(expected_uint2, uint, 32, 2);
+extern ARRAY(expected_uint3, uint, 32, 2);
+extern ARRAY(expected_uint4, uint, 32, 2);
+extern ARRAY(expected_nan, uint, 32, 2);
+extern ARRAY(expected_mnan, uint, 32, 2);
+extern ARRAY(expected_nan2, uint, 32, 2);
+extern ARRAY(expected_inf, uint, 32, 2);
+extern ARRAY(expected_minf, uint, 32, 2);
+extern ARRAY(expected_inf2, uint, 32, 2);
+extern ARRAY(expected_mzero, uint, 32, 2);
+extern ARRAY(expected_p8, uint, 8, 8);
+extern ARRAY(expected_q_p8, uint, 8, 16);
+
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+  /* Basic test: y=vcomp(x1,x2), then store the result.  */
+#define TEST_VCOMP1(INSN, Q, T1, T2, T3, W, N)				\
+  VECT_VAR(vector_res, T3, W, N) =					\
+    INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N),			\
+		      VECT_VAR(vector2, T1, W, N));			\
+  vst1##Q##_u##W(VECT_VAR(result, T3, W, N), VECT_VAR(vector_res, T3, W, N))
+
+#define TEST_VCOMP(INSN, Q, T1, T2, T3, W, N)				\
+  TEST_VCOMP1(INSN, Q, T1, T2, T3, W, N)
+
+  /* No need for 64 bits elements.  */
+  DECL_VARIABLE(vector, int, 8, 8);
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, uint, 8, 8);
+  DECL_VARIABLE(vector, uint, 16, 4);
+  DECL_VARIABLE(vector, uint, 32, 2);
+  DECL_VARIABLE(vector, float, 32, 2);
+  DECL_VARIABLE(vector, int, 8, 16);
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector, int, 32, 4);
+  DECL_VARIABLE(vector, uint, 8, 16);
+  DECL_VARIABLE(vector, uint, 16, 8);
+  DECL_VARIABLE(vector, uint, 32, 4);
+  DECL_VARIABLE(vector, float, 32, 4);
+
+  DECL_VARIABLE(vector2, int, 8, 8);
+  DECL_VARIABLE(vector2, int, 16, 4);
+  DECL_VARIABLE(vector2, int, 32, 2);
+  DECL_VARIABLE(vector2, uint, 8, 8);
+  DECL_VARIABLE(vector2, uint, 16, 4);
+  DECL_VARIABLE(vector2, uint, 32, 2);
+  DECL_VARIABLE(vector2, float, 32, 2);
+  DECL_VARIABLE(vector2, int, 8, 16);
+  DECL_VARIABLE(vector2, int, 16, 8);
+  DECL_VARIABLE(vector2, int, 32, 4);
+  DECL_VARIABLE(vector2, uint, 8, 16);
+  DECL_VARIABLE(vector2, uint, 16, 8);
+  DECL_VARIABLE(vector2, uint, 32, 4);
+  DECL_VARIABLE(vector2, float, 32, 4);
+
+  DECL_VARIABLE(vector_res, uint, 8, 8);
+  DECL_VARIABLE(vector_res, uint, 16, 4);
+  DECL_VARIABLE(vector_res, uint, 32, 2);
+  DECL_VARIABLE(vector_res, uint, 8, 16);
+  DECL_VARIABLE(vector_res, uint, 16, 8);
+  DECL_VARIABLE(vector_res, uint, 32, 4);
+
+  clean_results ();
+
+  /* There is no 64 bits variant, don't use the generic initializer.  */
+  VLOAD(vector, buffer, , int, s, 8, 8);
+  VLOAD(vector, buffer, , int, s, 16, 4);
+  VLOAD(vector, buffer, , int, s, 32, 2);
+  VLOAD(vector, buffer, , uint, u, 8, 8);
+  VLOAD(vector, buffer, , uint, u, 16, 4);
+  VLOAD(vector, buffer, , uint, u, 32, 2);
+  VLOAD(vector, buffer, , float, f, 32, 2);
+
+  VLOAD(vector, buffer, q, int, s, 8, 16);
+  VLOAD(vector, buffer, q, int, s, 16, 8);
+  VLOAD(vector, buffer, q, int, s, 32, 4);
+  VLOAD(vector, buffer, q, uint, u, 8, 16);
+  VLOAD(vector, buffer, q, uint, u, 16, 8);
+  VLOAD(vector, buffer, q, uint, u, 32, 4);
+  VLOAD(vector, buffer, q, float, f, 32, 4);
+
+  /* Choose init value arbitrarily, will be used for vector
+     comparison.  */
+  VDUP(vector2, , int, s, 8, 8, -10);
+  VDUP(vector2, , int, s, 16, 4, -14);
+  VDUP(vector2, , int, s, 32, 2, -16);
+  VDUP(vector2, , uint, u, 8, 8, 0xF3);
+  VDUP(vector2, , uint, u, 16, 4, 0xFFF2);
+  VDUP(vector2, , uint, u, 32, 2, 0xFFFFFFF1);
+  VDUP(vector2, , float, f, 32, 2, -15.0f);
+
+  VDUP(vector2, q, int, s, 8, 16, -4);
+  VDUP(vector2, q, int, s, 16, 8, -10);
+  VDUP(vector2, q, int, s, 32, 4, -14);
+  VDUP(vector2, q, uint, u, 8, 16, 0xF4);
+  VDUP(vector2, q, uint, u, 16, 8, 0xFFF6);
+  VDUP(vector2, q, uint, u, 32, 4, 0xFFFFFFF2);
+  VDUP(vector2, q, float, f, 32, 4, -14.0f);
+
+  /* The comparison operators produce only unsigned results, which
+     means that our tests with uint* inputs write their results in the
+     same vectors as the int* variants. As a consequence, we have to
+     execute and test the int* first, then the uint* ones.
+     Same thing for float and poly8.
+  */
+
+  /* Apply operator named INSN_NAME.  */
+  TEST_VCOMP(INSN_NAME, , int, s, uint, 8, 8);
+  TEST_VCOMP(INSN_NAME, , int, s, uint, 16, 4);
+  TEST_VCOMP(INSN_NAME, , int, s, uint, 32, 2);
+  TEST_VCOMP(INSN_NAME, q, int, s, uint, 8, 16);
+  TEST_VCOMP(INSN_NAME, q, int, s, uint, 16, 8);
+  TEST_VCOMP(INSN_NAME, q, int, s, uint, 32, 4);
+
+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, "");
+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, "");
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, "");
+  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, "");
+  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, "");
+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, "");
+
+  /* Now the uint* variants.  */
+  TEST_VCOMP(INSN_NAME, , uint, u, uint, 8, 8);
+  TEST_VCOMP(INSN_NAME, , uint, u, uint, 16, 4);
+  TEST_VCOMP(INSN_NAME, , uint, u, uint, 32, 2);
+  TEST_VCOMP(INSN_NAME, q, uint, u, uint, 8, 16);
+  TEST_VCOMP(INSN_NAME, q, uint, u, uint, 16, 8);
+  TEST_VCOMP(INSN_NAME, q, uint, u, uint, 32, 4);
+
+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_uint, "");
+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_uint, "");
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_uint, "");
+  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_q_uint, "");
+  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_q_uint, "");
+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_q_uint, "");
+
+  /* The float variants.  */
+  TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2);
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_float, "");
+
+  TEST_VCOMP(INSN_NAME, q, float, f, uint, 32, 4);
+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_q_float, "");
+
+  /* Some "special" input values to test some corner cases.  */
+  /* Extra tests to have 100% coverage on all the variants.  */
+  VDUP(vector2, , uint, u, 32, 2, 0xFFFFFFF0);
+  TEST_VCOMP(INSN_NAME, , uint, u, uint, 32, 2);
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_uint2, "uint 0xfffffff0");
+
+  VDUP(vector2, , int, s, 32, 2, -15);
+  TEST_VCOMP(INSN_NAME, , int, s, uint, 32, 2);
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_uint3, "int -15");
+
+  VDUP(vector2, , float, f, 32, 2, -16.0f);
+  TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2);
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_uint4, "float -16.0f");
+
+
+  /* Extra FP tests with special values (NaN, ....).  */
+  VDUP(vector, , float, f, 32, 2, 1.0);
+  VDUP(vector2, , float, f, 32, 2, NAN);
+  TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2);
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_nan, "FP special (NaN)");
+
+  VDUP(vector, , float, f, 32, 2, 1.0);
+  VDUP(vector2, , float, f, 32, 2, -NAN);
+  TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2);
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_mnan, " FP special (-NaN)");
+
+  VDUP(vector, , float, f, 32, 2, NAN);
+  VDUP(vector2, , float, f, 32, 2, 1.0);
+  TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2);
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_nan2, " FP special (NaN)");
+
+  VDUP(vector, , float, f, 32, 2, 1.0);
+  VDUP(vector2, , float, f, 32, 2, HUGE_VALF);
+  TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2);
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_inf, " FP special (inf)");
+
+  VDUP(vector, , float, f, 32, 2, 1.0);
+  VDUP(vector2, , float, f, 32, 2, -HUGE_VALF);
+  TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2);
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_minf, " FP special (-inf)");
+
+  VDUP(vector, , float, f, 32, 2, HUGE_VALF);
+  VDUP(vector2, , float, f, 32, 2, 1.0);
+  TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2);
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_inf2, " FP special (inf)");
+
+  VDUP(vector, , float, f, 32, 2, -0.0);
+  VDUP(vector2, , float, f, 32, 2, 0.0);
+  TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2);
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_mzero, " FP special (-0.0)");
+
+#ifdef EXTRA_TESTS
+  EXTRA_TESTS();
+#endif
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaba.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaba.c
@@ -0,0 +1,142 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf6, 0xf7, 0xf8, 0xf9,
+				       0xfa, 0xfb, 0xfc, 0xfd };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x16, 0x17, 0x18, 0x19 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x20, 0x21 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x53, 0x54, 0x55, 0x56,
+					0x57, 0x58, 0x59, 0x5a };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x907, 0x908, 0x909, 0x90a };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffe7, 0xffffffe8 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x5e, 0x5f, 0x60, 0x61,
+					0x62, 0x63, 0x64, 0x65,
+					0x66, 0x67, 0x68, 0x69,
+					0x6a, 0x6b, 0x6c, 0x6d };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xb9c, 0xb9d, 0xb9e, 0xb9f,
+					0xba0, 0xba1, 0xba2, 0xba3 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x26e0, 0x26e1, 0x26e2, 0x26e3 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+					0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
+					 0xfc, 0xfd, 0xfe, 0xff,
+					 0x0, 0x1, 0x2, 0x3,
+					 0x4, 0x5, 0x6, 0x7 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff9, 0xfffa, 0xfffb, 0xfffc,
+					 0xfffd, 0xfffe, 0xffff, 0x0 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xc, 0xd, 0xe, 0xf };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+					 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+					   0x33333333, 0x33333333 };
+
+#define TEST_MSG "VABA/VABAQ"
+void exec_vaba (void)
+{
+  /* Basic test: v4=vaba(v1,v2,v3), then store the result.  */
+#define TEST_VABA(Q, T1, T2, W, N)					\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    vaba##Q##_##T2##W(VECT_VAR(vector1, T1, W, N),			\
+		      VECT_VAR(vector2, T1, W, N),			\
+		      VECT_VAR(vector3, T1, W, N));			\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+#define DECL_VABA_VAR(VAR)			\
+  DECL_VARIABLE(VAR, int, 8, 8);		\
+  DECL_VARIABLE(VAR, int, 16, 4);		\
+  DECL_VARIABLE(VAR, int, 32, 2);		\
+  DECL_VARIABLE(VAR, uint, 8, 8);		\
+  DECL_VARIABLE(VAR, uint, 16, 4);		\
+  DECL_VARIABLE(VAR, uint, 32, 2);		\
+  DECL_VARIABLE(VAR, int, 8, 16);		\
+  DECL_VARIABLE(VAR, int, 16, 8);		\
+  DECL_VARIABLE(VAR, int, 32, 4);		\
+  DECL_VARIABLE(VAR, uint, 8, 16);		\
+  DECL_VARIABLE(VAR, uint, 16, 8);		\
+  DECL_VARIABLE(VAR, uint, 32, 4)
+
+  DECL_VABA_VAR(vector1);
+  DECL_VABA_VAR(vector2);
+  DECL_VABA_VAR(vector3);
+  DECL_VABA_VAR(vector_res);
+
+  clean_results ();
+
+  /* Initialize input "vector1" from "buffer".  */
+  VLOAD(vector1, buffer, , int, s, 8, 8);
+  VLOAD(vector1, buffer, , int, s, 16, 4);
+  VLOAD(vector1, buffer, , int, s, 32, 2);
+  VLOAD(vector1, buffer, , uint, u, 8, 8);
+  VLOAD(vector1, buffer, , uint, u, 16, 4);
+  VLOAD(vector1, buffer, , uint, u, 32, 2);
+  VLOAD(vector1, buffer, q, int, s, 8, 16);
+  VLOAD(vector1, buffer, q, int, s, 16, 8);
+  VLOAD(vector1, buffer, q, int, s, 32, 4);
+  VLOAD(vector1, buffer, q, uint, u, 8, 16);
+  VLOAD(vector1, buffer, q, uint, u, 16, 8);
+  VLOAD(vector1, buffer, q, uint, u, 32, 4);
+
+  /* Choose init value arbitrarily.  */
+  VDUP(vector2, , int, s, 8, 8, 1);
+  VDUP(vector2, , int, s, 16, 4, -13);
+  VDUP(vector2, , int, s, 32, 2, 8);
+  VDUP(vector2, , uint, u, 8, 8, 1);
+  VDUP(vector2, , uint, u, 16, 4, 13);
+  VDUP(vector2, , uint, u, 32, 2, 8);
+  VDUP(vector2, q, int, s, 8, 16, 10);
+  VDUP(vector2, q, int, s, 16, 8, -12);
+  VDUP(vector2, q, int, s, 32, 4, 32);
+  VDUP(vector2, q, uint, u, 8, 16, 10);
+  VDUP(vector2, q, uint, u, 16, 8, 12);
+  VDUP(vector2, q, uint, u, 32, 4, 32);
+
+  /* Choose init value arbitrarily.  */
+  VDUP(vector3, , int, s, 8, 8, -5);
+  VDUP(vector3, , int, s, 16, 4, 25);
+  VDUP(vector3, , int, s, 32, 2, -40);
+  VDUP(vector3, , uint, u, 8, 8, 100);
+  VDUP(vector3, , uint, u, 16, 4, 2340);
+  VDUP(vector3, , uint, u, 32, 2, 0xffffffff);
+  VDUP(vector3, q, int, s, 8, 16, -100);
+  VDUP(vector3, q, int, s, 16, 8, -3000);
+  VDUP(vector3, q, int, s, 32, 4, 10000);
+  VDUP(vector3, q, uint, u, 8, 16, 2);
+  VDUP(vector3, q, uint, u, 16, 8, 3);
+  VDUP(vector3, q, uint, u, 32, 4, 4);
+
+  /* Execute the tests.  */
+  TEST_VABA(, int, s, 8, 8);
+  TEST_VABA(, int, s, 16, 4);
+  TEST_VABA(, int, s, 32, 2);
+  TEST_VABA(, uint, u, 8, 8);
+  TEST_VABA(, uint, u, 16, 4);
+  TEST_VABA(, uint, u, 32, 2);
+  TEST_VABA(q, int, s, 8, 16);
+  TEST_VABA(q, int, s, 16, 8);
+  TEST_VABA(q, int, s, 32, 4);
+  TEST_VABA(q, uint, u, 8, 16);
+  TEST_VABA(q, uint, u, 16, 8);
+  TEST_VABA(q, uint, u, 32, 4);
+
+  CHECK_RESULTS (TEST_MSG, "");
+}
+
+int main (void)
+{
+  exec_vaba ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmin.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmin.c
@@ -0,0 +1,52 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define INSN_NAME vmin
+#define TEST_MSG "VMIN/VMINQ"
+
+#define HAS_FLOAT_VARIANT
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+				       0xf3, 0xf3, 0xf3, 0xf3 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff2 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff0 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					0xf3, 0xf3, 0xf3, 0xf3 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff1, 0xfff1 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff0 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1780000 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					0xf4, 0xf4, 0xf4, 0xf4,
+					0xf4, 0xf4, 0xf4, 0xf4,
+					0xf4, 0xf4, 0xf4, 0xf4 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+					0xfff3, 0xfff3, 0xfff3, 0xfff3 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
+					0xfffffff1, 0xfffffff1 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					 0xf4, 0xf5, 0xf6, 0xf7,
+					 0xf8, 0xf9, 0xf9, 0xf9,
+					 0xf9, 0xf9, 0xf9, 0xf9 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff2,
+					 0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
+					 0xfffffff1, 0xfffffff1 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
+					   0xc1680000, 0xc1680000 };
+/* Expected results with special FP values.  */
+VECT_VAR_DECL(expected_nan,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000,
+					       0x7fc00000, 0x7fc00000 };
+VECT_VAR_DECL(expected_mnan,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000,
+						0x7fc00000, 0x7fc00000 };
+VECT_VAR_DECL(expected_inf,hfloat,32,4) [] = { 0x3f800000, 0x3f800000,
+					       0x3f800000, 0x3f800000 };
+VECT_VAR_DECL(expected_minf,hfloat,32,4) [] = { 0xff800000, 0xff800000,
+						0xff800000, 0xff800000 };
+VECT_VAR_DECL(expected_zero1,hfloat,32,4) [] = { 0x80000000, 0x80000000,
+						 0x80000000, 0x80000000 };
+VECT_VAR_DECL(expected_zero2,hfloat,32,4) [] = { 0x80000000, 0x80000000,
+						 0x80000000, 0x80000000 };
+
+#include "binary_op_no64.inc"
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpaddl.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpaddl.c
@@ -0,0 +1,116 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xffe1, 0xffe5, 0xffe9, 0xffed };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xffffffe1, 0xffffffe5 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffffe1 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x1e1, 0x1e5, 0x1e9, 0x1ed };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x1ffe1, 0x1ffe5 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x1ffffffe1 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xffe1, 0xffe5, 0xffe9, 0xffed,
+					0xfff1, 0xfff5, 0xfff9, 0xfffd };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffe1, 0xffffffe5,
+					0xffffffe9, 0xffffffed };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffe1,
+					0xffffffffffffffe5 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x1e1, 0x1e5, 0x1e9, 0x1ed,
+					 0x1f1, 0x1f5, 0x1f9, 0x1fd };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x1ffe1, 0x1ffe5, 0x1ffe9, 0x1ffed };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x1ffffffe1, 0x1ffffffe5 };
+
+#define INSN_NAME vpaddl
+#define TEST_MSG "VPADDL/VPADDLQ"
+
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN_NAME)
+{
+  /* Basic test: y=OP(x), then store the result.  */
+#define TEST_VPADDL1(INSN, Q, T1, T2, W, N, W2, N2)	\
+  VECT_VAR(vector_res, T1, W2, N2) =			\
+    INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N));	\
+  vst1##Q##_##T2##W2(VECT_VAR(result, T1, W2, N2),	\
+		    VECT_VAR(vector_res, T1, W2, N2))
+
+#define TEST_VPADDL(INSN, Q, T1, T2, W, N, W2, N2)	\
+  TEST_VPADDL1(INSN, Q, T1, T2, W, N, W2, N2)
+
+  /* No need for 64 bits elements variants.  */
+  DECL_VARIABLE(vector, int, 8, 8);
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, uint, 8, 8);
+  DECL_VARIABLE(vector, uint, 16, 4);
+  DECL_VARIABLE(vector, uint, 32, 2);
+  DECL_VARIABLE(vector, int, 8, 16);
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector, int, 32, 4);
+  DECL_VARIABLE(vector, uint, 8, 16);
+  DECL_VARIABLE(vector, uint, 16, 8);
+  DECL_VARIABLE(vector, uint, 32, 4);
+
+  DECL_VARIABLE(vector_res, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 2);
+  DECL_VARIABLE(vector_res, int, 64, 1);
+  DECL_VARIABLE(vector_res, uint, 16, 4);
+  DECL_VARIABLE(vector_res, uint, 32, 2);
+  DECL_VARIABLE(vector_res, uint, 64, 1);
+  DECL_VARIABLE(vector_res, int, 16, 8);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+  DECL_VARIABLE(vector_res, int, 64, 2);
+  DECL_VARIABLE(vector_res, uint, 16, 8);
+  DECL_VARIABLE(vector_res, uint, 32, 4);
+  DECL_VARIABLE(vector_res, uint, 64, 2);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer".  */
+  VLOAD(vector, buffer, , int, s, 8, 8);
+  VLOAD(vector, buffer, , int, s, 16, 4);
+  VLOAD(vector, buffer, , int, s, 32, 2);
+  VLOAD(vector, buffer, , uint, u, 8, 8);
+  VLOAD(vector, buffer, , uint, u, 16, 4);
+  VLOAD(vector, buffer, , uint, u, 32, 2);
+  VLOAD(vector, buffer, q, int, s, 8, 16);
+  VLOAD(vector, buffer, q, int, s, 16, 8);
+  VLOAD(vector, buffer, q, int, s, 32, 4);
+  VLOAD(vector, buffer, q, uint, u, 8, 16);
+  VLOAD(vector, buffer, q, uint, u, 16, 8);
+  VLOAD(vector, buffer, q, uint, u, 32, 4);
+
+  /* Apply a unary operator named INSN_NAME.  */
+  TEST_VPADDL(INSN_NAME, , int, s, 8, 8, 16, 4);
+  TEST_VPADDL(INSN_NAME, , int, s, 16, 4, 32, 2);
+  TEST_VPADDL(INSN_NAME, , int, s, 32, 2, 64, 1);
+  TEST_VPADDL(INSN_NAME, , uint, u, 8, 8, 16, 4);
+  TEST_VPADDL(INSN_NAME, , uint, u, 16, 4, 32, 2);
+  TEST_VPADDL(INSN_NAME, , uint, u, 32, 2, 64, 1);
+  TEST_VPADDL(INSN_NAME, q, int, s, 8, 16, 16, 8);
+  TEST_VPADDL(INSN_NAME, q, int, s, 16, 8, 32, 4);
+  TEST_VPADDL(INSN_NAME, q, int, s, 32, 4, 64, 2);
+  TEST_VPADDL(INSN_NAME, q, uint, u, 8, 16, 16, 8);
+  TEST_VPADDL(INSN_NAME, q, uint, u, 16, 8, 32, 4);
+  TEST_VPADDL(INSN_NAME, q, uint, u, 32, 4, 64, 2);
+
+  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, "");
+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, "");
+  CHECK(TEST_MSG, int, 64, 1, PRIx64, expected, "");
+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, "");
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, "");
+  CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, "");
+  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, "");
+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, "");
+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, "");
+  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, "");
+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, "");
+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, "");
+}
+
+int main (void)
+{
+  exec_vpaddl ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlsl_n.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlsl_n.c
@@ -0,0 +1,18 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define INSN_NAME vmlsl_n
+#define TEST_MSG "VMLSL_N"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffa4b, 0xfffffa4c,
+					0xfffffa4d, 0xfffffa4e };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffff4a6,
+					0xfffffffffffff4a7 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffef01, 0xffffef02,
+					 0xffffef03, 0xffffef04 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffef01,
+					 0xffffffffffffef02 };
+
+#include "vmlXl_n.inc"
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlal_lane.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlal_lane.c
@@ -0,0 +1,14 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define INSN_NAME vmlal_lane
+#define TEST_MSG "VMLAL_LANE"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3e07, 0x3e08 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3e07, 0x3e08 };
+
+#include "vmlXl_lane.inc"
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmax.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmax.c
@@ -0,0 +1,51 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define INSN_NAME vmax
+#define TEST_MSG "VMAX/VMAXQ"
+
+#define HAS_FLOAT_VARIANT
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf3, 0xf3, 0xf3, 0xf3,
+				       0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf3, 0xf3, 0xf3, 0xf3,
+					0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff1, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1780000, 0xc1700000 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0xf4, 0xf4, 0xf4, 0xf4,
+					0xf4, 0xf5, 0xf6, 0xf7,
+					0xf8, 0xf9, 0xfa, 0xfb,
+					0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3,
+					0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff1, 0xfffffff1,
+					0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf9, 0xf9, 0xf9, 0xf9,
+					 0xf9, 0xf9, 0xf9, 0xf9,
+					 0xf9, 0xf9, 0xfa, 0xfb,
+					 0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff3,
+					 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff1, 0xfffffff1,
+					 0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1680000, 0xc1680000,
+					   0xc1600000, 0xc1500000 };
+
+/* Expected results with special FP values.  */
+VECT_VAR_DECL(expected_nan,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000,
+					       0x7fc00000, 0x7fc00000 };
+VECT_VAR_DECL(expected_mnan,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000,
+						0x7fc00000, 0x7fc00000 };
+VECT_VAR_DECL(expected_inf,hfloat,32,4) [] = { 0x7f800000, 0x7f800000,
+					       0x7f800000, 0x7f800000 };
+VECT_VAR_DECL(expected_minf,hfloat,32,4) [] = { 0x3f800000, 0x3f800000,
+						0x3f800000, 0x3f800000 };
+VECT_VAR_DECL(expected_zero1,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_zero2,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+
+#include "binary_op_no64.inc"
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_lane.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_lane.c
@@ -0,0 +1,610 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results.  */
+
+/* vld2/chunk 0.  */
+VECT_VAR_DECL(expected_vld2_0,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+					      0xaa, 0xaa, 0xaa, 0xaa };
+VECT_VAR_DECL(expected_vld2_0,int,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld2_0,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected_vld2_0,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected_vld2_0,uint,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+					       0xaa, 0xaa, 0xaa, 0xaa };
+VECT_VAR_DECL(expected_vld2_0,uint,16,4) [] = { 0xaaaa, 0xaaaa,
+						0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld2_0,uint,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld2_0,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected_vld2_0,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+					       0xaa, 0xaa, 0xaa, 0xaa };
+VECT_VAR_DECL(expected_vld2_0,poly,16,4) [] = { 0xaaaa, 0xaaaa,
+						0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld2_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
+VECT_VAR_DECL(expected_vld2_0,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld2_0,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+					       0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld2_0,int,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+					       0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld2_0,int,64,2) [] = { 0x3333333333333333,
+					       0x3333333333333333 };
+VECT_VAR_DECL(expected_vld2_0,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld2_0,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+						0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld2_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
+						0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld2_0,uint,64,2) [] = { 0x3333333333333333,
+						0x3333333333333333 };
+VECT_VAR_DECL(expected_vld2_0,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld2_0,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+						0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld2_0,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+						  0xaaaaaaaa, 0xaaaaaaaa };
+
+/* vld2/chunk 1.  */
+VECT_VAR_DECL(expected_vld2_1,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+					      0xaa, 0xaa, 0xf0, 0xf1 };
+VECT_VAR_DECL(expected_vld2_1,int,16,4) [] = { 0xfff0, 0xfff1, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld2_1,int,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld2_1,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected_vld2_1,uint,8,8) [] = { 0xf0, 0xf1, 0xaa, 0xaa,
+					       0xaa, 0xaa, 0xaa, 0xaa };
+VECT_VAR_DECL(expected_vld2_1,uint,16,4) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1 };
+VECT_VAR_DECL(expected_vld2_1,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected_vld2_1,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected_vld2_1,poly,8,8) [] = { 0xf0, 0xf1, 0xaa, 0xaa,
+					       0xaa, 0xaa, 0xaa, 0xaa };
+VECT_VAR_DECL(expected_vld2_1,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1 };
+VECT_VAR_DECL(expected_vld2_1,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld2_1,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld2_1,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+					       0xfff0, 0xfff1, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld2_1,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
+					       0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld2_1,int,64,2) [] = { 0x3333333333333333,
+					       0x3333333333333333 };
+VECT_VAR_DECL(expected_vld2_1,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld2_1,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1,
+						0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld2_1,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+						0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld2_1,uint,64,2) [] = { 0x3333333333333333,
+						0x3333333333333333 };
+VECT_VAR_DECL(expected_vld2_1,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld2_1,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1,
+						0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld2_1,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
+						  0xaaaaaaaa, 0xaaaaaaaa };
+
+/* vld3/chunk 0.  */
+VECT_VAR_DECL(expected_vld3_0,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+					      0xaa, 0xaa, 0xaa, 0xaa };
+VECT_VAR_DECL(expected_vld3_0,int,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld3_0,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected_vld3_0,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected_vld3_0,uint,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+					       0xaa, 0xaa, 0xaa, 0xaa };
+VECT_VAR_DECL(expected_vld3_0,uint,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld3_0,uint,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld3_0,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected_vld3_0,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+					       0xaa, 0xaa, 0xaa, 0xaa };
+VECT_VAR_DECL(expected_vld3_0,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld3_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
+VECT_VAR_DECL(expected_vld3_0,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld3_0,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+					       0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld3_0,int,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+					       0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld3_0,int,64,2) [] = { 0x3333333333333333,
+					       0x3333333333333333 };
+VECT_VAR_DECL(expected_vld3_0,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld3_0,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+						0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld3_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
+						0xfffffff2, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld3_0,uint,64,2) [] = { 0x3333333333333333,
+						0x3333333333333333 };
+VECT_VAR_DECL(expected_vld3_0,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld3_0,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+						0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld3_0,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+						  0xaaaaaaaa, 0xaaaaaaaa };
+
+/* vld3/chunk 1.  */
+VECT_VAR_DECL(expected_vld3_1,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+					      0xaa, 0xaa, 0xaa, 0xaa };
+VECT_VAR_DECL(expected_vld3_1,int,16,4) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1 };
+VECT_VAR_DECL(expected_vld3_1,int,32,2) [] = { 0xfffffff2, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld3_1,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected_vld3_1,uint,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+					       0xf0, 0xf1, 0xf2, 0xaa };
+VECT_VAR_DECL(expected_vld3_1,uint,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld3_1,uint,32,2) [] = { 0xaaaaaaaa, 0xfffffff0 };
+VECT_VAR_DECL(expected_vld3_1,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected_vld3_1,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+					       0xf0, 0xf1, 0xf2, 0xaa };
+VECT_VAR_DECL(expected_vld3_1,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld3_1,hfloat,32,2) [] = { 0xc1600000, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld3_1,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld3_1,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+					       0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld3_1,int,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+					       0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected_vld3_1,int,64,2) [] = { 0x3333333333333333,
+					       0x3333333333333333 };
+VECT_VAR_DECL(expected_vld3_1,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld3_1,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+						0xaaaa, 0xaaaa, 0xaaaa, 0xfff0 };
+VECT_VAR_DECL(expected_vld3_1,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+						0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld3_1,uint,64,2) [] = { 0x3333333333333333,
+						0x3333333333333333 };
+VECT_VAR_DECL(expected_vld3_1,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld3_1,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+						0xaaaa, 0xaaaa, 0xaaaa, 0xfff0 };
+VECT_VAR_DECL(expected_vld3_1,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+						  0xc1800000, 0xc1700000 };
+
+/* vld3/chunk 2.  */
+VECT_VAR_DECL(expected_vld3_2,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+					      0xaa, 0xf0, 0xf1, 0xf2 };
+VECT_VAR_DECL(expected_vld3_2,int,16,4) [] = { 0xfff2, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld3_2,int,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld3_2,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected_vld3_2,uint,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+					       0xaa, 0xaa, 0xaa, 0xaa };
+VECT_VAR_DECL(expected_vld3_2,uint,16,4) [] = { 0xaaaa, 0xfff0, 0xfff1, 0xfff2 };
+VECT_VAR_DECL(expected_vld3_2,uint,32,2) [] = { 0xfffffff1, 0xfffffff2 };
+VECT_VAR_DECL(expected_vld3_2,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected_vld3_2,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+					       0xaa, 0xaa, 0xaa, 0xaa };
+VECT_VAR_DECL(expected_vld3_2,poly,16,4) [] = { 0xaaaa, 0xfff0, 0xfff1, 0xfff2 };
+VECT_VAR_DECL(expected_vld3_2,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld3_2,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld3_2,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1,
+					       0xfff2, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld3_2,int,32,4) [] = { 0xfffffff2, 0xaaaaaaaa,
+					       0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld3_2,int,64,2) [] = { 0x3333333333333333,
+					       0x3333333333333333 };
+VECT_VAR_DECL(expected_vld3_2,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld3_2,uint,16,8) [] = { 0xfff1, 0xfff2, 0xaaaa, 0xaaaa,
+						0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld3_2,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+						0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld3_2,uint,64,2) [] = { 0x3333333333333333,
+						0x3333333333333333 };
+VECT_VAR_DECL(expected_vld3_2,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld3_2,poly,16,8) [] = { 0xfff1, 0xfff2, 0xaaaa, 0xaaaa,
+						0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld3_2,hfloat,32,4) [] = { 0xc1600000, 0xaaaaaaaa,
+						  0xaaaaaaaa, 0xaaaaaaaa };
+
+/* vld4/chunk 0.  */
+VECT_VAR_DECL(expected_vld4_0,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+					      0xaa, 0xaa, 0xaa, 0xaa };
+VECT_VAR_DECL(expected_vld4_0,int,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_0,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected_vld4_0,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_0,uint,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+					       0xaa, 0xaa, 0xaa, 0xaa };
+VECT_VAR_DECL(expected_vld4_0,uint,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_0,uint,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld4_0,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_0,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+					       0xaa, 0xaa, 0xaa, 0xaa };
+VECT_VAR_DECL(expected_vld4_0,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
+VECT_VAR_DECL(expected_vld4_0,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld4_0,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+					       0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_0,int,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+					       0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld4_0,int,64,2) [] = { 0x3333333333333333,
+					       0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_0,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld4_0,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+						0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
+						0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected_vld4_0,uint,64,2) [] = { 0x3333333333333333,
+						0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_0,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld4_0,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+						0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_0,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+						  0xaaaaaaaa, 0xaaaaaaaa };
+
+/* vld4/chunk 1.  */
+VECT_VAR_DECL(expected_vld4_1,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+					      0xaa, 0xaa, 0xaa, 0xaa };
+VECT_VAR_DECL(expected_vld4_1,int,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_1,int,32,2) [] = { 0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected_vld4_1,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_1,uint,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+					       0xaa, 0xaa, 0xaa, 0xaa };
+VECT_VAR_DECL(expected_vld4_1,uint,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_1,uint,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld4_1,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_1,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+					       0xaa, 0xaa, 0xaa, 0xaa };
+VECT_VAR_DECL(expected_vld4_1,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
+VECT_VAR_DECL(expected_vld4_1,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld4_1,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+					       0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_1,int,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+					       0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld4_1,int,64,2) [] = { 0x3333333333333333,
+					       0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_1,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld4_1,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+						0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_1,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+						0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld4_1,uint,64,2) [] = { 0x3333333333333333,
+						0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_1,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld4_1,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+						0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_1,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+						  0xaaaaaaaa, 0xaaaaaaaa };
+
+/* vld4/chunk 2.  */
+VECT_VAR_DECL(expected_vld4_2,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+					      0xaa, 0xaa, 0xaa, 0xaa };
+VECT_VAR_DECL(expected_vld4_2,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld4_2,int,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld4_2,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_2,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					       0xaa, 0xaa, 0xaa, 0xaa };
+VECT_VAR_DECL(expected_vld4_2,uint,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_2,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected_vld4_2,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_2,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					       0xaa, 0xaa, 0xaa, 0xaa };
+VECT_VAR_DECL(expected_vld4_2,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_2,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld4_2,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld4_2,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+					       0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_2,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
+					       0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected_vld4_2,int,64,2) [] = { 0x3333333333333333,
+					       0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_2,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld4_2,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+						0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld4_2,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+						0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld4_2,uint,64,2) [] = { 0x3333333333333333,
+						0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_2,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld4_2,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+						0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld4_2,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
+						  0xc1600000, 0xc1500000 };
+
+/* vld4/chunk 3.  */
+VECT_VAR_DECL(expected_vld4_3,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+					      0xf0, 0xf1, 0xf2, 0xf3 };
+VECT_VAR_DECL(expected_vld4_3,int,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_3,int,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld4_3,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_3,uint,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+					       0xaa, 0xaa, 0xaa, 0xaa };
+VECT_VAR_DECL(expected_vld4_3,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld4_3,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected_vld4_3,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_3,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+					       0xaa, 0xaa, 0xaa, 0xaa };
+VECT_VAR_DECL(expected_vld4_3,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld4_3,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld4_3,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld4_3,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+					       0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_3,int,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+					       0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld4_3,int,64,2) [] = { 0x3333333333333333,
+					       0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_3,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld4_3,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+						0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_3,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+						0xaaaaaaaa, 0xaaaaaaaa };
+VECT_VAR_DECL(expected_vld4_3,uint,64,2) [] = { 0x3333333333333333,
+						0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_3,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_vld4_3,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+						0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected_vld4_3,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+						  0xaaaaaaaa, 0xaaaaaaaa };
+
+/* Declare additional input buffers as needed.  */
+/* Input buffers for vld2_lane */
+VECT_VAR_DECL_INIT(buffer_vld2_lane, int, 8, 2);
+VECT_VAR_DECL_INIT(buffer_vld2_lane, int, 16, 2);
+VECT_VAR_DECL_INIT(buffer_vld2_lane, int, 32, 2);
+VECT_VAR_DECL_INIT(buffer_vld2_lane, int, 64, 2);
+VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 8, 2);
+VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 16, 2);
+VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 32, 2);
+VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 64, 2);
+VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 8, 2);
+VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 16, 2);
+VECT_VAR_DECL_INIT(buffer_vld2_lane, float, 32, 2);
+
+/* Input buffers for vld3_lane */
+VECT_VAR_DECL_INIT(buffer_vld3_lane, int, 8, 3);
+VECT_VAR_DECL_INIT(buffer_vld3_lane, int, 16, 3);
+VECT_VAR_DECL_INIT(buffer_vld3_lane, int, 32, 3);
+VECT_VAR_DECL_INIT(buffer_vld3_lane, int, 64, 3);
+VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 8, 3);
+VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 16, 3);
+VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 32, 3);
+VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 64, 3);
+VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 8, 3);
+VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 16, 3);
+VECT_VAR_DECL_INIT(buffer_vld3_lane, float, 32, 3);
+
+/* Input buffers for vld4_lane */
+VECT_VAR_DECL_INIT(buffer_vld4_lane, int, 8, 4);
+VECT_VAR_DECL_INIT(buffer_vld4_lane, int, 16, 4);
+VECT_VAR_DECL_INIT(buffer_vld4_lane, int, 32, 4);
+VECT_VAR_DECL_INIT(buffer_vld4_lane, int, 64, 4);
+VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 8, 4);
+VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 16, 4);
+VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 32, 4);
+VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 64, 4);
+VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 8, 4);
+VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 16, 4);
+VECT_VAR_DECL_INIT(buffer_vld4_lane, float, 32, 4);
+
+void exec_vldX_lane (void)
+{
+  /* In this case, input variables are arrays of vectors.  */
+#define DECL_VLDX_LANE(T1, W, N, X)					\
+  VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector, T1, W, N, X);	\
+  VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector_src, T1, W, N, X);	\
+  VECT_VAR_DECL(result_bis_##X, T1, W, N)[X * N]
+
+  /* We need to use a temporary result buffer (result_bis), because
+     the one used for other tests is not large enough. A subset of the
+     result data is moved from result_bis to result, and it is this
+     subset which is used to check the actual behaviour. The next
+     macro enables to move another chunk of data from result_bis to
+     result.  */
+  /* We also use another extra input buffer (buffer_src), which we
+     fill with 0xAA, and which it used to load a vector from which we
+     read a given lane.  */
+#define TEST_VLDX_LANE(Q, T1, T2, W, N, X, L)				\
+  memset (VECT_VAR(buffer_src, T1, W, N), 0xAA,				\
+	  sizeof(VECT_VAR(buffer_src, T1, W, N)));			\
+									\
+  VECT_ARRAY_VAR(vector_src, T1, W, N, X) =				\
+    vld##X##Q##_##T2##W(VECT_VAR(buffer_src, T1, W, N));		\
+									\
+  VECT_ARRAY_VAR(vector, T1, W, N, X) =					\
+    /* Use dedicated init buffer, of size.  X */			\
+    vld##X##Q##_lane_##T2##W(VECT_VAR(buffer_vld##X##_lane, T1, W, X),	\
+			     VECT_ARRAY_VAR(vector_src, T1, W, N, X),	\
+			     L);					\
+  vst##X##Q##_##T2##W(VECT_VAR(result_bis_##X, T1, W, N),		\
+		      VECT_ARRAY_VAR(vector, T1, W, N, X));		\
+  memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis_##X, T1, W, N), \
+	 sizeof(VECT_VAR(result, T1, W, N)))
+
+  /* Overwrite "result" with the contents of "result_bis"[Y].  */
+#define TEST_EXTRA_CHUNK(T1, W, N, X, Y)		\
+  memcpy(VECT_VAR(result, T1, W, N),			\
+	 &(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]),	\
+	 sizeof(VECT_VAR(result, T1, W, N)));
+
+  /* We need all variants in 64 bits, but there is no 64x2 variant.  */
+#define DECL_ALL_VLDX_LANE(X)			\
+  DECL_VLDX_LANE(int, 8, 8, X);			\
+  DECL_VLDX_LANE(int, 16, 4, X);		\
+  DECL_VLDX_LANE(int, 32, 2, X);		\
+  DECL_VLDX_LANE(uint, 8, 8, X);		\
+  DECL_VLDX_LANE(uint, 16, 4, X);		\
+  DECL_VLDX_LANE(uint, 32, 2, X);		\
+  DECL_VLDX_LANE(poly, 8, 8, X);		\
+  DECL_VLDX_LANE(poly, 16, 4, X);		\
+  DECL_VLDX_LANE(int, 16, 8, X);		\
+  DECL_VLDX_LANE(int, 32, 4, X);		\
+  DECL_VLDX_LANE(uint, 16, 8, X);		\
+  DECL_VLDX_LANE(uint, 32, 4, X);		\
+  DECL_VLDX_LANE(poly, 16, 8, X);		\
+  DECL_VLDX_LANE(float, 32, 2, X);		\
+  DECL_VLDX_LANE(float, 32, 4, X)
+
+  /* Add some padding to try to catch out of bound accesses.  */
+#define ARRAY1(V, T, W, N) VECT_VAR_DECL(V,T,W,N)[1]={42}
+#define DUMMY_ARRAY(V, T, W, N, L) \
+  VECT_VAR_DECL(V,T,W,N)[N*L]={0}; \
+  ARRAY1(V##_pad,T,W,N)
+
+  /* Use the same lanes regardless of the size of the array (X), for
+     simplicity.  */
+#define TEST_ALL_VLDX_LANE(X)			\
+  TEST_VLDX_LANE(, int, s, 8, 8, X, 7);		\
+  TEST_VLDX_LANE(, int, s, 16, 4, X, 2);	\
+  TEST_VLDX_LANE(, int, s, 32, 2, X, 0);	\
+  TEST_VLDX_LANE(, uint, u, 8, 8, X, 4);	\
+  TEST_VLDX_LANE(, uint, u, 16, 4, X, 3);	\
+  TEST_VLDX_LANE(, uint, u, 32, 2, X, 1);	\
+  TEST_VLDX_LANE(, poly, p, 8, 8, X, 4);	\
+  TEST_VLDX_LANE(, poly, p, 16, 4, X, 3);	\
+  TEST_VLDX_LANE(q, int, s, 16, 8, X, 6);	\
+  TEST_VLDX_LANE(q, int, s, 32, 4, X, 2);	\
+  TEST_VLDX_LANE(q, uint, u, 16, 8, X, 5);	\
+  TEST_VLDX_LANE(q, uint, u, 32, 4, X, 0);	\
+  TEST_VLDX_LANE(q, poly, p, 16, 8, X, 5);	\
+  TEST_VLDX_LANE(, float, f, 32, 2, X, 0);	\
+  TEST_VLDX_LANE(q, float, f, 32, 4, X, 2)
+
+#define TEST_ALL_EXTRA_CHUNKS(X, Y)		\
+  TEST_EXTRA_CHUNK(int, 8, 8, X, Y);		\
+  TEST_EXTRA_CHUNK(int, 16, 4, X, Y);		\
+  TEST_EXTRA_CHUNK(int, 32, 2, X, Y);		\
+  TEST_EXTRA_CHUNK(uint, 8, 8, X, Y);		\
+  TEST_EXTRA_CHUNK(uint, 16, 4, X, Y);		\
+  TEST_EXTRA_CHUNK(uint, 32, 2, X, Y);		\
+  TEST_EXTRA_CHUNK(poly, 8, 8, X, Y);		\
+  TEST_EXTRA_CHUNK(poly, 16, 4, X, Y);		\
+  TEST_EXTRA_CHUNK(int, 16, 8, X, Y);		\
+  TEST_EXTRA_CHUNK(int, 32, 4, X, Y);		\
+  TEST_EXTRA_CHUNK(uint, 16, 8, X, Y);		\
+  TEST_EXTRA_CHUNK(uint, 32, 4, X, Y);		\
+  TEST_EXTRA_CHUNK(poly, 16, 8, X, Y);		\
+  TEST_EXTRA_CHUNK(float, 32, 2, X, Y);		\
+  TEST_EXTRA_CHUNK(float, 32, 4, X, Y)
+
+  /* Declare the temporary buffers / variables.  */
+  DECL_ALL_VLDX_LANE(2);
+  DECL_ALL_VLDX_LANE(3);
+  DECL_ALL_VLDX_LANE(4);
+
+  /* Define dummy input arrays, large enough for x4 vectors.  */
+  DUMMY_ARRAY(buffer_src, int, 8, 8, 4);
+  DUMMY_ARRAY(buffer_src, int, 16, 4, 4);
+  DUMMY_ARRAY(buffer_src, int, 32, 2, 4);
+  DUMMY_ARRAY(buffer_src, uint, 8, 8, 4);
+  DUMMY_ARRAY(buffer_src, uint, 16, 4, 4);
+  DUMMY_ARRAY(buffer_src, uint, 32, 2, 4);
+  DUMMY_ARRAY(buffer_src, poly, 8, 8, 4);
+  DUMMY_ARRAY(buffer_src, poly, 16, 4, 4);
+  DUMMY_ARRAY(buffer_src, int, 16, 8, 4);
+  DUMMY_ARRAY(buffer_src, int, 32, 4, 4);
+  DUMMY_ARRAY(buffer_src, uint, 16, 8, 4);
+  DUMMY_ARRAY(buffer_src, uint, 32, 4, 4);
+  DUMMY_ARRAY(buffer_src, poly, 16, 8, 4);
+  DUMMY_ARRAY(buffer_src, float, 32, 2, 4);
+  DUMMY_ARRAY(buffer_src, float, 32, 4, 4);
+
+  /* Check vld2_lane/vld2q_lane.  */
+  clean_results ();
+#define TEST_MSG "VLD2_LANE/VLD2Q_LANE"
+  TEST_ALL_VLDX_LANE(2);
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld2_0, " chunk 0");
+
+  TEST_ALL_EXTRA_CHUNKS(2, 1);
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld2_1, " chunk 1");
+
+  /* Check vld3_lane/vld3q_lane.  */
+  clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VLD3_LANE/VLD3Q_LANE"
+  TEST_ALL_VLDX_LANE(3);
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld3_0, " chunk 0");
+
+  TEST_ALL_EXTRA_CHUNKS(3, 1);
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld3_1, " chunk 1");
+
+  TEST_ALL_EXTRA_CHUNKS(3, 2);
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld3_2, " chunk 2");
+
+  /* Check vld4_lane/vld4q_lane.  */
+  clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VLD4_LANE/VLD4Q_LANE"
+  TEST_ALL_VLDX_LANE(4);
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld4_0, " chunk 0");
+
+  TEST_ALL_EXTRA_CHUNKS(4, 1);
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld4_1, " chunk 1");
+  TEST_ALL_EXTRA_CHUNKS(4, 2);
+
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld4_2, " chunk 2");
+
+  TEST_ALL_EXTRA_CHUNKS(4, 3);
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld4_3, " chunk 3");
+}
+
+int main (void)
+{
+  exec_vldX_lane ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlX_lane.inc
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlX_lane.inc
@@ -0,0 +1,100 @@
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+#define DECL_VMLX_LANE(VAR)			\
+  DECL_VARIABLE(VAR, int, 16, 4);		\
+  DECL_VARIABLE(VAR, int, 32, 2);		\
+  DECL_VARIABLE(VAR, uint, 16, 4);		\
+  DECL_VARIABLE(VAR, uint, 32, 2);		\
+  DECL_VARIABLE(VAR, float, 32, 2);		\
+  DECL_VARIABLE(VAR, int, 16, 8);		\
+  DECL_VARIABLE(VAR, int, 32, 4);		\
+  DECL_VARIABLE(VAR, uint, 16, 8);		\
+  DECL_VARIABLE(VAR, uint, 32, 4);		\
+  DECL_VARIABLE(VAR, float, 32, 4)
+
+  /* vector_res = vmlx_lane(vector, vector2, vector3, lane),
+     then store the result.  */
+#define TEST_VMLX_LANE1(INSN, Q, T1, T2, W, N, N2, L)			\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    INSN##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N),			\
+			   VECT_VAR(vector2, T1, W, N),			\
+			   VECT_VAR(vector3, T1, W, N2),		\
+			   L);						\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \
+		    VECT_VAR(vector_res, T1, W, N))
+
+#define TEST_VMLX_LANE(INSN, Q, T1, T2, W, N, N2, V)	\
+  TEST_VMLX_LANE1(INSN, Q, T1, T2, W, N, N2, V)
+
+  DECL_VMLX_LANE(vector);
+  DECL_VMLX_LANE(vector2);
+  DECL_VMLX_LANE(vector_res);
+
+  DECL_VARIABLE(vector3, int, 16, 4);
+  DECL_VARIABLE(vector3, int, 32, 2);
+  DECL_VARIABLE(vector3, uint, 16, 4);
+  DECL_VARIABLE(vector3, uint, 32, 2);
+  DECL_VARIABLE(vector3, float, 32, 2);
+
+  clean_results ();
+
+  VLOAD(vector, buffer, , int, s, 16, 4);
+  VLOAD(vector, buffer, , int, s, 32, 2);
+  VLOAD(vector, buffer, , uint, u, 16, 4);
+  VLOAD(vector, buffer, , uint, u, 32, 2);
+  VLOAD(vector, buffer, q, int, s, 16, 8);
+  VLOAD(vector, buffer, q, int, s, 32, 4);
+  VLOAD(vector, buffer, q, uint, u, 16, 8);
+  VLOAD(vector, buffer, q, uint, u, 32, 4);
+  VLOAD(vector, buffer, , float, f, 32, 2);
+  VLOAD(vector, buffer, q, float, f, 32, 4);
+
+  VDUP(vector2, , int, s, 16, 4, 0x55);
+  VDUP(vector2, , int, s, 32, 2, 0x55);
+  VDUP(vector2, , uint, u, 16, 4, 0x55);
+  VDUP(vector2, , uint, u, 32, 2, 0x55);
+  VDUP(vector2, , float, f, 32, 2, 55.3f);
+  VDUP(vector2, q, int, s, 16, 8, 0x55);
+  VDUP(vector2, q, int, s, 32, 4, 0x55);
+  VDUP(vector2, q, uint, u, 16, 8, 0x55);
+  VDUP(vector2, q, uint, u, 32, 4, 0x55);
+  VDUP(vector2, q, float, f, 32, 4, 55.8f);
+
+  VDUP(vector3, , int, s, 16, 4, 0xBB);
+  VDUP(vector3, , int, s, 32, 2, 0xBB);
+  VDUP(vector3, , uint, u, 16, 4, 0xBB);
+  VDUP(vector3, , uint, u, 32, 2, 0xBB);
+  VDUP(vector3, , float, f, 32, 2, 11.34f);
+
+  /* Choose lane arbitrarily.  */
+  TEST_VMLX_LANE(INSN_NAME, , int, s, 16, 4, 4, 2);
+  TEST_VMLX_LANE(INSN_NAME, , int, s, 32, 2, 2, 1);
+  TEST_VMLX_LANE(INSN_NAME, , uint, u, 16, 4, 4, 2);
+  TEST_VMLX_LANE(INSN_NAME, , uint, u, 32, 2, 2, 1);
+  TEST_VMLX_LANE(INSN_NAME, , float, f, 32, 2, 2, 1);
+  TEST_VMLX_LANE(INSN_NAME, q, int, s, 16, 8, 4, 3);
+  TEST_VMLX_LANE(INSN_NAME, q, int, s, 32, 4, 2, 1);
+  TEST_VMLX_LANE(INSN_NAME, q, uint, u, 16, 8, 4, 2);
+  TEST_VMLX_LANE(INSN_NAME, q, uint, u, 32, 4, 2, 1);
+  TEST_VMLX_LANE(INSN_NAME, q, float, f, 32, 4, 2, 1);
+
+  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, "");
+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, "");
+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, "");
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, "");
+  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, "");
+  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, "");
+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, "");
+  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, "");
+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, "");
+  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, "");
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX.c
@@ -0,0 +1,692 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results.  */
+
+/* vld2/chunk 0.  */
+VECT_VAR_DECL(expected_vld2_0,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					      0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected_vld2_0,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld2_0,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected_vld2_0,int,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld2_0,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					       0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected_vld2_0,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld2_0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected_vld2_0,uint,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld2_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					       0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected_vld2_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld2_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
+VECT_VAR_DECL(expected_vld2_0,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					       0xf4, 0xf5, 0xf6, 0xf7,
+					       0xf8, 0xf9, 0xfa, 0xfb,
+					       0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected_vld2_0,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+					       0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld2_0,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
+					       0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected_vld2_0,int,64,2) [] = { 0x3333333333333333,
+					       0x3333333333333333 };
+VECT_VAR_DECL(expected_vld2_0,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+						0xf4, 0xf5, 0xf6, 0xf7,
+						0xf8, 0xf9, 0xfa, 0xfb,
+						0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected_vld2_0,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+						0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld2_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
+						0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected_vld2_0,uint,64,2) [] = { 0x3333333333333333,
+						0x3333333333333333 };
+VECT_VAR_DECL(expected_vld2_0,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+						0xf4, 0xf5, 0xf6, 0xf7,
+						0xf8, 0xf9, 0xfa, 0xfb,
+						0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected_vld2_0,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+						0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld2_0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
+						  0xc1600000, 0xc1500000 };
+
+/* vld2/chunk 1.  */
+VECT_VAR_DECL(expected_vld2_1,int,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
+					      0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected_vld2_1,int,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld2_1,int,32,2) [] = { 0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected_vld2_1,int,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld2_1,uint,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
+					       0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected_vld2_1,uint,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld2_1,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected_vld2_1,uint,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld2_1,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
+					       0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected_vld2_1,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld2_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
+VECT_VAR_DECL(expected_vld2_1,int,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
+					       0x4, 0x5, 0x6, 0x7,
+					       0x8, 0x9, 0xa, 0xb,
+					       0xc, 0xd, 0xe, 0xf };
+VECT_VAR_DECL(expected_vld2_1,int,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
+					       0xfffc, 0xfffd, 0xfffe, 0xffff };
+VECT_VAR_DECL(expected_vld2_1,int,32,4) [] = { 0xfffffff4, 0xfffffff5,
+					       0xfffffff6, 0xfffffff7 };
+VECT_VAR_DECL(expected_vld2_1,int,64,2) [] = { 0x3333333333333333,
+					       0x3333333333333333 };
+VECT_VAR_DECL(expected_vld2_1,uint,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
+						0x4, 0x5, 0x6, 0x7,
+						0x8, 0x9, 0xa, 0xb,
+						0xc, 0xd, 0xe, 0xf };
+VECT_VAR_DECL(expected_vld2_1,uint,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
+						0xfffc, 0xfffd, 0xfffe, 0xffff };
+VECT_VAR_DECL(expected_vld2_1,uint,32,4) [] = { 0xfffffff4, 0xfffffff5,
+						0xfffffff6, 0xfffffff7 };
+VECT_VAR_DECL(expected_vld2_1,uint,64,2) [] = { 0x3333333333333333,
+						0x3333333333333333 };
+VECT_VAR_DECL(expected_vld2_1,poly,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
+						0x4, 0x5, 0x6, 0x7,
+						0x8, 0x9, 0xa, 0xb,
+						0xc, 0xd, 0xe, 0xf };
+VECT_VAR_DECL(expected_vld2_1,poly,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
+						0xfffc, 0xfffd, 0xfffe, 0xffff };
+VECT_VAR_DECL(expected_vld2_1,hfloat,32,4) [] = { 0xc1400000, 0xc1300000,
+						  0xc1200000, 0xc1100000 };
+
+/* vld3/chunk 0.  */
+VECT_VAR_DECL(expected_vld3_0,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					      0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected_vld3_0,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld3_0,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected_vld3_0,int,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld3_0,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					       0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected_vld3_0,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld3_0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected_vld3_0,uint,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld3_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					       0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected_vld3_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld3_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
+VECT_VAR_DECL(expected_vld3_0,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					       0xf4, 0xf5, 0xf6, 0xf7,
+					       0xf8, 0xf9, 0xfa, 0xfb,
+					       0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected_vld3_0,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+					       0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld3_0,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
+					       0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected_vld3_0,int,64,2) [] = { 0x3333333333333333,
+					       0x3333333333333333 };
+VECT_VAR_DECL(expected_vld3_0,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+						0xf4, 0xf5, 0xf6, 0xf7,
+						0xf8, 0xf9, 0xfa, 0xfb,
+						0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected_vld3_0,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+						0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld3_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
+						0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected_vld3_0,uint,64,2) [] = { 0x3333333333333333,
+						0x3333333333333333 };
+VECT_VAR_DECL(expected_vld3_0,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+						0xf4, 0xf5, 0xf6, 0xf7,
+						0xf8, 0xf9, 0xfa, 0xfb,
+						0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected_vld3_0,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+						0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld3_0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
+						  0xc1600000, 0xc1500000 };
+
+/* vld3/chunk 1.  */
+VECT_VAR_DECL(expected_vld3_1,int,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
+					      0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected_vld3_1,int,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld3_1,int,32,2) [] = { 0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected_vld3_1,int,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld3_1,uint,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
+					       0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected_vld3_1,uint,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld3_1,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected_vld3_1,uint,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld3_1,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
+					       0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected_vld3_1,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld3_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
+VECT_VAR_DECL(expected_vld3_1,int,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
+					       0x4, 0x5, 0x6, 0x7,
+					       0x8, 0x9, 0xa, 0xb,
+					       0xc, 0xd, 0xe, 0xf };
+VECT_VAR_DECL(expected_vld3_1,int,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
+					       0xfffc, 0xfffd, 0xfffe, 0xffff };
+VECT_VAR_DECL(expected_vld3_1,int,32,4) [] = { 0xfffffff4, 0xfffffff5,
+					       0xfffffff6, 0xfffffff7 };
+VECT_VAR_DECL(expected_vld3_1,int,64,2) [] = { 0x3333333333333333,
+					       0x3333333333333333 };
+VECT_VAR_DECL(expected_vld3_1,uint,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
+						0x4, 0x5, 0x6, 0x7,
+						0x8, 0x9, 0xa, 0xb,
+						0xc, 0xd, 0xe, 0xf };
+VECT_VAR_DECL(expected_vld3_1,uint,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
+						0xfffc, 0xfffd, 0xfffe, 0xffff };
+VECT_VAR_DECL(expected_vld3_1,uint,32,4) [] = { 0xfffffff4, 0xfffffff5,
+						0xfffffff6, 0xfffffff7 };
+VECT_VAR_DECL(expected_vld3_1,uint,64,2) [] = { 0x3333333333333333,
+						0x3333333333333333 };
+VECT_VAR_DECL(expected_vld3_1,poly,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
+						0x4, 0x5, 0x6, 0x7,
+						0x8, 0x9, 0xa, 0xb,
+						0xc, 0xd, 0xe, 0xf };
+VECT_VAR_DECL(expected_vld3_1,poly,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
+						0xfffc, 0xfffd, 0xfffe, 0xffff };
+VECT_VAR_DECL(expected_vld3_1,hfloat,32,4) [] = { 0xc1400000, 0xc1300000,
+						  0xc1200000, 0xc1100000 };
+
+/* vld3/chunk 2.  */
+VECT_VAR_DECL(expected_vld3_2,int,8,8) [] = { 0x0, 0x1, 0x2, 0x3,
+					      0x4, 0x5, 0x6, 0x7 };
+VECT_VAR_DECL(expected_vld3_2,int,16,4) [] = { 0xfff8, 0xfff9,
+					       0xfffa, 0xfffb };
+VECT_VAR_DECL(expected_vld3_2,int,32,2) [] = { 0xfffffff4, 0xfffffff5 };
+VECT_VAR_DECL(expected_vld3_2,int,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected_vld3_2,uint,8,8) [] = { 0x0, 0x1, 0x2, 0x3,
+					       0x4, 0x5, 0x6, 0x7 };
+VECT_VAR_DECL(expected_vld3_2,uint,16,4) [] = { 0xfff8, 0xfff9,
+						0xfffa, 0xfffb };
+VECT_VAR_DECL(expected_vld3_2,uint,32,2) [] = { 0xfffffff4, 0xfffffff5 };
+VECT_VAR_DECL(expected_vld3_2,uint,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected_vld3_2,poly,8,8) [] = { 0x0, 0x1, 0x2, 0x3,
+					       0x4, 0x5, 0x6, 0x7 };
+VECT_VAR_DECL(expected_vld3_2,poly,16,4) [] = { 0xfff8, 0xfff9,
+						0xfffa, 0xfffb };
+VECT_VAR_DECL(expected_vld3_2,hfloat,32,2) [] = { 0xc1400000, 0xc1300000 };
+VECT_VAR_DECL(expected_vld3_2,int,8,16) [] = { 0x10, 0x11, 0x12, 0x13,
+					       0x14, 0x15, 0x16, 0x17,
+					       0x18, 0x19, 0x1a, 0x1b,
+					       0x1c, 0x1d, 0x1e, 0x1f };
+VECT_VAR_DECL(expected_vld3_2,int,16,8) [] = { 0x0, 0x1, 0x2, 0x3,
+					       0x4, 0x5, 0x6, 0x7 };
+VECT_VAR_DECL(expected_vld3_2,int,32,4) [] = { 0xfffffff8, 0xfffffff9,
+					       0xfffffffa, 0xfffffffb };
+VECT_VAR_DECL(expected_vld3_2,int,64,2) [] = { 0x3333333333333333,
+					       0x3333333333333333 };
+VECT_VAR_DECL(expected_vld3_2,uint,8,16) [] = { 0x10, 0x11, 0x12, 0x13,
+						0x14, 0x15, 0x16, 0x17,
+						0x18, 0x19, 0x1a, 0x1b,
+						0x1c, 0x1d, 0x1e, 0x1f };
+VECT_VAR_DECL(expected_vld3_2,uint,16,8) [] = { 0x0, 0x1, 0x2, 0x3,
+						0x4, 0x5, 0x6, 0x7 };
+VECT_VAR_DECL(expected_vld3_2,uint,32,4) [] = { 0xfffffff8, 0xfffffff9,
+						0xfffffffa, 0xfffffffb };
+VECT_VAR_DECL(expected_vld3_2,uint,64,2) [] = { 0x3333333333333333,
+						0x3333333333333333 };
+VECT_VAR_DECL(expected_vld3_2,poly,8,16) [] = { 0x10, 0x11, 0x12, 0x13,
+						0x14, 0x15, 0x16, 0x17,
+						0x18, 0x19, 0x1a, 0x1b,
+						0x1c, 0x1d, 0x1e, 0x1f };
+VECT_VAR_DECL(expected_vld3_2,poly,16,8) [] = { 0x0, 0x1, 0x2, 0x3,
+						0x4, 0x5, 0x6, 0x7 };
+VECT_VAR_DECL(expected_vld3_2,hfloat,32,4) [] = { 0xc1000000, 0xc0e00000,
+						  0xc0c00000, 0xc0a00000 };
+
+/* vld4/chunk 0.  */
+VECT_VAR_DECL(expected_vld4_0,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					      0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected_vld4_0,int,16,4) [] = { 0xfff0, 0xfff1,
+					       0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld4_0,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected_vld4_0,int,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld4_0,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					       0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected_vld4_0,uint,16,4) [] = { 0xfff0, 0xfff1,
+						0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld4_0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected_vld4_0,uint,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld4_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					       0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected_vld4_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_vld4_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
+VECT_VAR_DECL(expected_vld4_0,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					       0xf4, 0xf5, 0xf6, 0xf7,
+					       0xf8, 0xf9, 0xfa, 0xfb,
+					       0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected_vld4_0,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+					       0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld4_0,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
+					       0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected_vld4_0,int,64,2) [] = { 0x3333333333333333,
+					       0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_0,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+						0xf4, 0xf5, 0xf6, 0xf7,
+						0xf8, 0xf9, 0xfa, 0xfb,
+						0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected_vld4_0,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+						0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld4_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
+						0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected_vld4_0,uint,64,2) [] = { 0x3333333333333333,
+						0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_0,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+						0xf4, 0xf5, 0xf6, 0xf7,
+						0xf8, 0xf9, 0xfa, 0xfb,
+						0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected_vld4_0,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+						0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld4_0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
+						  0xc1600000, 0xc1500000 };
+
+/* vld4/chunk 1.  */
+VECT_VAR_DECL(expected_vld4_1,int,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
+					      0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected_vld4_1,int,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld4_1,int,32,2) [] = { 0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected_vld4_1,int,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld4_1,uint,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
+					       0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected_vld4_1,uint,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld4_1,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected_vld4_1,uint,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld4_1,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
+					       0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected_vld4_1,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_vld4_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
+VECT_VAR_DECL(expected_vld4_1,int,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
+					       0x4, 0x5, 0x6, 0x7,
+					       0x8, 0x9, 0xa, 0xb,
+					       0xc, 0xd, 0xe, 0xf };
+VECT_VAR_DECL(expected_vld4_1,int,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
+					       0xfffc, 0xfffd, 0xfffe, 0xffff };
+VECT_VAR_DECL(expected_vld4_1,int,32,4) [] = { 0xfffffff4, 0xfffffff5,
+					       0xfffffff6, 0xfffffff7 };
+VECT_VAR_DECL(expected_vld4_1,int,64,2) [] = { 0x3333333333333333,
+					       0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_1,uint,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
+						0x4, 0x5, 0x6, 0x7,
+						0x8, 0x9, 0xa, 0xb,
+						0xc, 0xd, 0xe, 0xf };
+VECT_VAR_DECL(expected_vld4_1,uint,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
+						0xfffc, 0xfffd, 0xfffe, 0xffff };
+VECT_VAR_DECL(expected_vld4_1,uint,32,4) [] = { 0xfffffff4, 0xfffffff5,
+						0xfffffff6, 0xfffffff7 };
+VECT_VAR_DECL(expected_vld4_1,uint,64,2) [] = { 0x3333333333333333,
+						0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_1,poly,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
+						0x4, 0x5, 0x6, 0x7,
+						0x8, 0x9, 0xa, 0xb,
+						0xc, 0xd, 0xe, 0xf };
+VECT_VAR_DECL(expected_vld4_1,poly,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb,
+						0xfffc, 0xfffd, 0xfffe, 0xffff };
+VECT_VAR_DECL(expected_vld4_1,hfloat,32,4) [] = { 0xc1400000, 0xc1300000,
+						  0xc1200000, 0xc1100000 };
+
+/* vld4/chunk 2.  */
+VECT_VAR_DECL(expected_vld4_2,int,8,8) [] = { 0x0, 0x1, 0x2, 0x3,
+					      0x4, 0x5, 0x6, 0x7 };
+VECT_VAR_DECL(expected_vld4_2,int,16,4) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb };
+VECT_VAR_DECL(expected_vld4_2,int,32,2) [] = { 0xfffffff4, 0xfffffff5 };
+VECT_VAR_DECL(expected_vld4_2,int,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected_vld4_2,uint,8,8) [] = { 0x0, 0x1, 0x2, 0x3,
+					       0x4, 0x5, 0x6, 0x7 };
+VECT_VAR_DECL(expected_vld4_2,uint,16,4) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb };
+VECT_VAR_DECL(expected_vld4_2,uint,32,2) [] = { 0xfffffff4, 0xfffffff5 };
+VECT_VAR_DECL(expected_vld4_2,uint,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected_vld4_2,poly,8,8) [] = { 0x0, 0x1, 0x2, 0x3,
+					       0x4, 0x5, 0x6, 0x7 };
+VECT_VAR_DECL(expected_vld4_2,poly,16,4) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb };
+VECT_VAR_DECL(expected_vld4_2,hfloat,32,2) [] = { 0xc1400000, 0xc1300000 };
+VECT_VAR_DECL(expected_vld4_2,int,8,16) [] = { 0x10, 0x11, 0x12, 0x13,
+					       0x14, 0x15, 0x16, 0x17,
+					       0x18, 0x19, 0x1a, 0x1b,
+					       0x1c, 0x1d, 0x1e, 0x1f };
+VECT_VAR_DECL(expected_vld4_2,int,16,8) [] = { 0x0, 0x1, 0x2, 0x3,
+					       0x4, 0x5, 0x6, 0x7 };
+VECT_VAR_DECL(expected_vld4_2,int,32,4) [] = { 0xfffffff8, 0xfffffff9,
+					       0xfffffffa, 0xfffffffb };
+VECT_VAR_DECL(expected_vld4_2,int,64,2) [] = { 0x3333333333333333,
+					       0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_2,uint,8,16) [] = { 0x10, 0x11, 0x12, 0x13,
+						0x14, 0x15, 0x16, 0x17,
+						0x18, 0x19, 0x1a, 0x1b,
+						0x1c, 0x1d, 0x1e, 0x1f };
+VECT_VAR_DECL(expected_vld4_2,uint,16,8) [] = { 0x0, 0x1, 0x2, 0x3,
+						0x4, 0x5, 0x6, 0x7 };
+VECT_VAR_DECL(expected_vld4_2,uint,32,4) [] = { 0xfffffff8, 0xfffffff9,
+						0xfffffffa, 0xfffffffb };
+VECT_VAR_DECL(expected_vld4_2,uint,64,2) [] = { 0x3333333333333333,
+						0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_2,poly,8,16) [] = { 0x10, 0x11, 0x12, 0x13,
+						0x14, 0x15, 0x16, 0x17,
+						0x18, 0x19, 0x1a, 0x1b,
+						0x1c, 0x1d, 0x1e, 0x1f };
+VECT_VAR_DECL(expected_vld4_2,poly,16,8) [] = { 0x0, 0x1, 0x2, 0x3,
+						0x4, 0x5, 0x6, 0x7 };
+VECT_VAR_DECL(expected_vld4_2,hfloat,32,4) [] = { 0xc1000000, 0xc0e00000,
+						  0xc0c00000, 0xc0a00000 };
+
+/* vld4/chunk 3.  */
+VECT_VAR_DECL(expected_vld4_3,int,8,8) [] = { 0x8, 0x9, 0xa, 0xb,
+					      0xc, 0xd, 0xe, 0xf };
+VECT_VAR_DECL(expected_vld4_3,int,16,4) [] = { 0xfffc, 0xfffd, 0xfffe, 0xffff };
+VECT_VAR_DECL(expected_vld4_3,int,32,2) [] = { 0xfffffff6, 0xfffffff7 };
+VECT_VAR_DECL(expected_vld4_3,int,64,1) [] = { 0xfffffffffffffff3 };
+VECT_VAR_DECL(expected_vld4_3,uint,8,8) [] = { 0x8, 0x9, 0xa, 0xb,
+					       0xc, 0xd, 0xe, 0xf };
+VECT_VAR_DECL(expected_vld4_3,uint,16,4) [] = { 0xfffc, 0xfffd, 0xfffe, 0xffff };
+VECT_VAR_DECL(expected_vld4_3,uint,32,2) [] = { 0xfffffff6, 0xfffffff7 };
+VECT_VAR_DECL(expected_vld4_3,uint,64,1) [] = { 0xfffffffffffffff3 };
+VECT_VAR_DECL(expected_vld4_3,poly,8,8) [] = { 0x8, 0x9, 0xa, 0xb,
+					       0xc, 0xd, 0xe, 0xf };
+VECT_VAR_DECL(expected_vld4_3,poly,16,4) [] = { 0xfffc, 0xfffd, 0xfffe, 0xffff };
+VECT_VAR_DECL(expected_vld4_3,hfloat,32,2) [] = { 0xc1200000, 0xc1100000 };
+VECT_VAR_DECL(expected_vld4_3,int,8,16) [] = { 0x20, 0x21, 0x22, 0x23,
+					       0x24, 0x25, 0x26, 0x27,
+					       0x28, 0x29, 0x2a, 0x2b,
+					       0x2c, 0x2d, 0x2e, 0x2f };
+VECT_VAR_DECL(expected_vld4_3,int,16,8) [] = { 0x8, 0x9, 0xa, 0xb,
+					       0xc, 0xd, 0xe, 0xf };
+VECT_VAR_DECL(expected_vld4_3,int,32,4) [] = { 0xfffffffc, 0xfffffffd,
+					       0xfffffffe, 0xffffffff };
+VECT_VAR_DECL(expected_vld4_3,int,64,2) [] = { 0x3333333333333333,
+					       0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_3,uint,8,16) [] = { 0x20, 0x21, 0x22, 0x23,
+						0x24, 0x25, 0x26, 0x27,
+						0x28, 0x29, 0x2a, 0x2b,
+						0x2c, 0x2d, 0x2e, 0x2f };
+VECT_VAR_DECL(expected_vld4_3,uint,16,8) [] = { 0x8, 0x9, 0xa, 0xb,
+						0xc, 0xd, 0xe, 0xf };
+VECT_VAR_DECL(expected_vld4_3,uint,32,4) [] = { 0xfffffffc, 0xfffffffd,
+						0xfffffffe, 0xffffffff };
+VECT_VAR_DECL(expected_vld4_3,uint,64,2) [] = { 0x3333333333333333,
+						0x3333333333333333 };
+VECT_VAR_DECL(expected_vld4_3,poly,8,16) [] = { 0x20, 0x21, 0x22, 0x23,
+						0x24, 0x25, 0x26, 0x27,
+						0x28, 0x29, 0x2a, 0x2b,
+						0x2c, 0x2d, 0x2e, 0x2f };
+VECT_VAR_DECL(expected_vld4_3,poly,16,8) [] = { 0x8, 0x9, 0xa, 0xb,
+						0xc, 0xd, 0xe, 0xf };
+VECT_VAR_DECL(expected_vld4_3,hfloat,32,4) [] = { 0xc0800000, 0xc0400000,
+						  0xc0000000, 0xbf800000 };
+
+void exec_vldX (void)
+{
+  /* In this case, input variables are arrays of vectors.  */
+#define DECL_VLDX(T1, W, N, X)						\
+  VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector, T1, W, N, X);	\
+  VECT_VAR_DECL(result_bis_##X, T1, W, N)[X * N]
+
+  /* We need to use a temporary result buffer (result_bis), because
+     the one used for other tests is not large enough. A subset of the
+     result data is moved from result_bis to result, and it is this
+     subset which is used to check the actual behaviour. The next
+     macro enables to move another chunk of data from result_bis to
+     result.  */
+#define TEST_VLDX(Q, T1, T2, W, N, X)					\
+  VECT_ARRAY_VAR(vector, T1, W, N, X) =					\
+    /* Use dedicated init buffer, of size X */				\
+    vld##X##Q##_##T2##W(VECT_ARRAY_VAR(buffer_vld##X, T1, W, N, X));	\
+  vst##X##Q##_##T2##W(VECT_VAR(result_bis_##X, T1, W, N),		\
+		      VECT_ARRAY_VAR(vector, T1, W, N, X));		\
+  memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis_##X, T1, W, N), \
+	 sizeof(VECT_VAR(result, T1, W, N)));
+
+  /* Overwrite "result" with the contents of "result_bis"[Y].  */
+#define TEST_EXTRA_CHUNK(T1, W, N, X,Y)			\
+  memcpy(VECT_VAR(result, T1, W, N),			\
+	 &(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]),	\
+	 sizeof(VECT_VAR(result, T1, W, N)));
+
+  /* We need all variants in 64 bits, but there is no 64x2 variant.  */
+#define DECL_ALL_VLDX(X)			\
+  DECL_VLDX(int, 8, 8, X);			\
+  DECL_VLDX(int, 16, 4, X);			\
+  DECL_VLDX(int, 32, 2, X);			\
+  DECL_VLDX(int, 64, 1, X);			\
+  DECL_VLDX(uint, 8, 8, X);			\
+  DECL_VLDX(uint, 16, 4, X);			\
+  DECL_VLDX(uint, 32, 2, X);			\
+  DECL_VLDX(uint, 64, 1, X);			\
+  DECL_VLDX(poly, 8, 8, X);			\
+  DECL_VLDX(poly, 16, 4, X);			\
+  DECL_VLDX(float, 32, 2, X);			\
+  DECL_VLDX(int, 8, 16, X);			\
+  DECL_VLDX(int, 16, 8, X);			\
+  DECL_VLDX(int, 32, 4, X);			\
+  DECL_VLDX(uint, 8, 16, X);			\
+  DECL_VLDX(uint, 16, 8, X);			\
+  DECL_VLDX(uint, 32, 4, X);			\
+  DECL_VLDX(poly, 8, 16, X);			\
+  DECL_VLDX(poly, 16, 8, X);			\
+  DECL_VLDX(float, 32, 4, X)
+
+#define TEST_ALL_VLDX(X)			\
+  TEST_VLDX(, int, s, 8, 8, X);			\
+  TEST_VLDX(, int, s, 16, 4, X);		\
+  TEST_VLDX(, int, s, 32, 2, X);		\
+  TEST_VLDX(, int, s, 64, 1, X);		\
+  TEST_VLDX(, uint, u, 8, 8, X);		\
+  TEST_VLDX(, uint, u, 16, 4, X);		\
+  TEST_VLDX(, uint, u, 32, 2, X);		\
+  TEST_VLDX(, uint, u, 64, 1, X);		\
+  TEST_VLDX(, poly, p, 8, 8, X);		\
+  TEST_VLDX(, poly, p, 16, 4, X);		\
+  TEST_VLDX(, float, f, 32, 2, X);		\
+  TEST_VLDX(q, int, s, 8, 16, X);		\
+  TEST_VLDX(q, int, s, 16, 8, X);		\
+  TEST_VLDX(q, int, s, 32, 4, X);		\
+  TEST_VLDX(q, uint, u, 8, 16, X);		\
+  TEST_VLDX(q, uint, u, 16, 8, X);		\
+  TEST_VLDX(q, uint, u, 32, 4, X);		\
+  TEST_VLDX(q, poly, p, 8, 16, X);		\
+  TEST_VLDX(q, poly, p, 16, 8, X);		\
+  TEST_VLDX(q, float, f, 32, 4, X)
+
+#define TEST_ALL_EXTRA_CHUNKS(X, Y)		\
+  TEST_EXTRA_CHUNK(int, 8, 8, X, Y);		\
+  TEST_EXTRA_CHUNK(int, 16, 4, X, Y);		\
+  TEST_EXTRA_CHUNK(int, 32, 2, X, Y);		\
+  TEST_EXTRA_CHUNK(int, 64, 1, X, Y);		\
+  TEST_EXTRA_CHUNK(uint, 8, 8, X, Y);		\
+  TEST_EXTRA_CHUNK(uint, 16, 4, X, Y);		\
+  TEST_EXTRA_CHUNK(uint, 32, 2, X, Y);		\
+  TEST_EXTRA_CHUNK(uint, 64, 1, X, Y);		\
+  TEST_EXTRA_CHUNK(poly, 8, 8, X, Y);		\
+  TEST_EXTRA_CHUNK(poly, 16, 4, X, Y);		\
+  TEST_EXTRA_CHUNK(float, 32, 2, X, Y);		\
+  TEST_EXTRA_CHUNK(int, 8, 16, X, Y);		\
+  TEST_EXTRA_CHUNK(int, 16, 8, X, Y);		\
+  TEST_EXTRA_CHUNK(int, 32, 4, X, Y);		\
+  TEST_EXTRA_CHUNK(uint, 8, 16, X, Y);		\
+  TEST_EXTRA_CHUNK(uint, 16, 8, X, Y);		\
+  TEST_EXTRA_CHUNK(uint, 32, 4, X, Y);		\
+  TEST_EXTRA_CHUNK(poly, 8, 16, X, Y);		\
+  TEST_EXTRA_CHUNK(poly, 16, 8, X, Y);		\
+  TEST_EXTRA_CHUNK(float, 32, 4, X, Y)
+
+  DECL_ALL_VLDX(2);
+  DECL_ALL_VLDX(3);
+  DECL_ALL_VLDX(4);
+
+  /* Special input buffers of suitable size are needed for vld2/vld3/vld4.  */
+  /* Input buffers for vld2, 1 of each size */
+  VECT_ARRAY_INIT2(buffer_vld2, int, 8, 8);
+  PAD(buffer_vld2_pad, int, 8, 8);
+  VECT_ARRAY_INIT2(buffer_vld2, int, 16, 4);
+  PAD(buffer_vld2_pad, int, 16, 4);
+  VECT_ARRAY_INIT2(buffer_vld2, int, 32, 2);
+  PAD(buffer_vld2_pad, int, 32, 2);
+  VECT_ARRAY_INIT2(buffer_vld2, int, 64, 1);
+  PAD(buffer_vld2_pad, int, 64, 1);
+  VECT_ARRAY_INIT2(buffer_vld2, uint, 8, 8);
+  PAD(buffer_vld2_pad, uint, 8, 8);
+  VECT_ARRAY_INIT2(buffer_vld2, uint, 16, 4);
+  PAD(buffer_vld2_pad, uint, 16, 4);
+  VECT_ARRAY_INIT2(buffer_vld2, uint, 32, 2);
+  PAD(buffer_vld2_pad, uint, 32, 2);
+  VECT_ARRAY_INIT2(buffer_vld2, uint, 64, 1);
+  PAD(buffer_vld2_pad, uint, 64, 1);
+  VECT_ARRAY_INIT2(buffer_vld2, poly, 8, 8);
+  PAD(buffer_vld2_pad, poly, 8, 8);
+  VECT_ARRAY_INIT2(buffer_vld2, poly, 16, 4);
+  PAD(buffer_vld2_pad, poly, 16, 4);
+  VECT_ARRAY_INIT2(buffer_vld2, float, 32, 2);
+  PAD(buffer_vld2_pad, float, 32, 2);
+
+  VECT_ARRAY_INIT2(buffer_vld2, int, 8, 16);
+  PAD(buffer_vld2_pad, int, 8, 16);
+  VECT_ARRAY_INIT2(buffer_vld2, int, 16, 8);
+  PAD(buffer_vld2_pad, int, 16, 8);
+  VECT_ARRAY_INIT2(buffer_vld2, int, 32, 4);
+  PAD(buffer_vld2_pad, int, 32, 4);
+  VECT_ARRAY_INIT2(buffer_vld2, int, 64, 2);
+  PAD(buffer_vld2_pad, int, 64, 2);
+  VECT_ARRAY_INIT2(buffer_vld2, uint, 8, 16);
+  PAD(buffer_vld2_pad, uint, 8, 16);
+  VECT_ARRAY_INIT2(buffer_vld2, uint, 16, 8);
+  PAD(buffer_vld2_pad, uint, 16, 8);
+  VECT_ARRAY_INIT2(buffer_vld2, uint, 32, 4);
+  PAD(buffer_vld2_pad, uint, 32, 4);
+  VECT_ARRAY_INIT2(buffer_vld2, uint, 64, 2);
+  PAD(buffer_vld2_pad, uint, 64, 2);
+  VECT_ARRAY_INIT2(buffer_vld2, poly, 8, 16);
+  PAD(buffer_vld2_pad, poly, 8, 16);
+  VECT_ARRAY_INIT2(buffer_vld2, poly, 16, 8);
+  PAD(buffer_vld2_pad, poly, 16, 8);
+  VECT_ARRAY_INIT2(buffer_vld2, float, 32, 4);
+  PAD(buffer_vld2_pad, float, 32, 4);
+
+  /* Input buffers for vld3, 1 of each size */
+  VECT_ARRAY_INIT3(buffer_vld3, int, 8, 8);
+  PAD(buffer_vld3_pad, int, 8, 8);
+  VECT_ARRAY_INIT3(buffer_vld3, int, 16, 4);
+  PAD(buffer_vld3_pad, int, 16, 4);
+  VECT_ARRAY_INIT3(buffer_vld3, int, 32, 2);
+  PAD(buffer_vld3_pad, int, 32, 2);
+  VECT_ARRAY_INIT3(buffer_vld3, int, 64, 1);
+  PAD(buffer_vld3_pad, int, 64, 1);
+  VECT_ARRAY_INIT3(buffer_vld3, uint, 8, 8);
+  PAD(buffer_vld3_pad, uint, 8, 8);
+  VECT_ARRAY_INIT3(buffer_vld3, uint, 16, 4);
+  PAD(buffer_vld3_pad, uint, 16, 4);
+  VECT_ARRAY_INIT3(buffer_vld3, uint, 32, 2);
+  PAD(buffer_vld3_pad, uint, 32, 2);
+  VECT_ARRAY_INIT3(buffer_vld3, uint, 64, 1);
+  PAD(buffer_vld3_pad, uint, 64, 1);
+  VECT_ARRAY_INIT3(buffer_vld3, poly, 8, 8);
+  PAD(buffer_vld3_pad, poly, 8, 8);
+  VECT_ARRAY_INIT3(buffer_vld3, poly, 16, 4);
+  PAD(buffer_vld3_pad, poly, 16, 4);
+  VECT_ARRAY_INIT3(buffer_vld3, float, 32, 2);
+  PAD(buffer_vld3_pad, float, 32, 2);
+
+  VECT_ARRAY_INIT3(buffer_vld3, int, 8, 16);
+  PAD(buffer_vld3_pad, int, 8, 16);
+  VECT_ARRAY_INIT3(buffer_vld3, int, 16, 8);
+  PAD(buffer_vld3_pad, int, 16, 8);
+  VECT_ARRAY_INIT3(buffer_vld3, int, 32, 4);
+  PAD(buffer_vld3_pad, int, 32, 4);
+  VECT_ARRAY_INIT3(buffer_vld3, int, 64, 2);
+  PAD(buffer_vld3_pad, int, 64, 2);
+  VECT_ARRAY_INIT3(buffer_vld3, uint, 8, 16);
+  PAD(buffer_vld3_pad, uint, 8, 16);
+  VECT_ARRAY_INIT3(buffer_vld3, uint, 16, 8);
+  PAD(buffer_vld3_pad, uint, 16, 8);
+  VECT_ARRAY_INIT3(buffer_vld3, uint, 32, 4);
+  PAD(buffer_vld3_pad, uint, 32, 4);
+  VECT_ARRAY_INIT3(buffer_vld3, uint, 64, 2);
+  PAD(buffer_vld3_pad, uint, 64, 2);
+  VECT_ARRAY_INIT3(buffer_vld3, poly, 8, 16);
+  PAD(buffer_vld3_pad, poly, 8, 16);
+  VECT_ARRAY_INIT3(buffer_vld3, poly, 16, 8);
+  PAD(buffer_vld3_pad, poly, 16, 8);
+  VECT_ARRAY_INIT3(buffer_vld3, float, 32, 4);
+  PAD(buffer_vld3_pad, float, 32, 4);
+
+  /* Input buffers for vld4, 1 of each size */
+  VECT_ARRAY_INIT4(buffer_vld4, int, 8, 8);
+  PAD(buffer_vld4_pad, int, 8, 8);
+  VECT_ARRAY_INIT4(buffer_vld4, int, 16, 4);
+  PAD(buffer_vld4_pad, int, 16, 4);
+  VECT_ARRAY_INIT4(buffer_vld4, int, 32, 2);
+  PAD(buffer_vld4_pad, int, 32, 2);
+  VECT_ARRAY_INIT4(buffer_vld4, int, 64, 1);
+  PAD(buffer_vld4_pad, int, 64, 1);
+  VECT_ARRAY_INIT4(buffer_vld4, uint, 8, 8);
+  PAD(buffer_vld4_pad, uint, 8, 8);
+  VECT_ARRAY_INIT4(buffer_vld4, uint, 16, 4);
+  PAD(buffer_vld4_pad, uint, 16, 4);
+  VECT_ARRAY_INIT4(buffer_vld4, uint, 32, 2);
+  PAD(buffer_vld4_pad, uint, 32, 2);
+  VECT_ARRAY_INIT4(buffer_vld4, uint, 64, 1);
+  PAD(buffer_vld4_pad, uint, 64, 1);
+  VECT_ARRAY_INIT4(buffer_vld4, poly, 8, 8);
+  PAD(buffer_vld4_pad, poly, 8, 8);
+  VECT_ARRAY_INIT4(buffer_vld4, poly, 16, 4);
+  PAD(buffer_vld4_pad, poly, 16, 4);
+  VECT_ARRAY_INIT4(buffer_vld4, float, 32, 2);
+  PAD(buffer_vld4_pad, float, 32, 2);
+
+  VECT_ARRAY_INIT4(buffer_vld4, int, 8, 16);
+  PAD(buffer_vld4_pad, int, 8, 16);
+  VECT_ARRAY_INIT4(buffer_vld4, int, 16, 8);
+  PAD(buffer_vld4_pad, int, 16, 8);
+  VECT_ARRAY_INIT4(buffer_vld4, int, 32, 4);
+  PAD(buffer_vld4_pad, int, 32, 4);
+  VECT_ARRAY_INIT4(buffer_vld4, int, 64, 2);
+  PAD(buffer_vld4_pad, int, 64, 2);
+  VECT_ARRAY_INIT4(buffer_vld4, uint, 8, 16);
+  PAD(buffer_vld4_pad, uint, 8, 16);
+  VECT_ARRAY_INIT4(buffer_vld4, uint, 16, 8);
+  PAD(buffer_vld4_pad, uint, 16, 8);
+  VECT_ARRAY_INIT4(buffer_vld4, uint, 32, 4);
+  PAD(buffer_vld4_pad, uint, 32, 4);
+  VECT_ARRAY_INIT4(buffer_vld4, uint, 64, 2);
+  PAD(buffer_vld4_pad, uint, 64, 2);
+  VECT_ARRAY_INIT4(buffer_vld4, poly, 8, 16);
+  PAD(buffer_vld4_pad, poly, 8, 16);
+  VECT_ARRAY_INIT4(buffer_vld4, poly, 16, 8);
+  PAD(buffer_vld4_pad, poly, 16, 8);
+  VECT_ARRAY_INIT4(buffer_vld4, float, 32, 4);
+  PAD(buffer_vld4_pad, float, 32, 4);
+
+  /* Check vld2/vld2q.  */
+  clean_results ();
+#define TEST_MSG "VLD2/VLD2Q"
+  TEST_ALL_VLDX(2);
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld2_0, "chunk 0");
+
+  TEST_ALL_EXTRA_CHUNKS(2, 1);
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld2_1, "chunk 1");
+
+  /* Check vld3/vld3q.  */
+  clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VLD3/VLD3Q"
+  TEST_ALL_VLDX(3);
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld3_0, "chunk 0");
+
+  TEST_ALL_EXTRA_CHUNKS(3, 1);
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld3_1, "chunk 1");
+
+  TEST_ALL_EXTRA_CHUNKS(3, 2);
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld3_2, "chunk 2");
+
+  /* Check vld4/vld4q.  */
+  clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VLD4/VLD4Q"
+  TEST_ALL_VLDX(4);
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld4_0, "chunk 0");
+
+  TEST_ALL_EXTRA_CHUNKS(4, 1);
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld4_1, "chunk 1");
+
+  TEST_ALL_EXTRA_CHUNKS(4, 2);
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld4_2, "chunk 2");
+
+  TEST_ALL_EXTRA_CHUNKS(4, 3);
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_vld4_3, "chunk 3");
+}
+
+int main (void)
+{
+  exec_vldX ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vadd.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vadd.c
@@ -0,0 +1,81 @@
+#define INSN_NAME vadd
+#define TEST_MSG "VADD/VADDQ"
+
+/* Extra tests for functions requiring floating-point types.  */
+void exec_vadd_f32(void);
+#define EXTRA_TESTS exec_vadd_f32
+
+#include "binary_op.inc"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf2, 0xf3, 0xf4, 0xf5,
+				       0xf6, 0xf7, 0xf8, 0xf9 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xffec, 0xffed, 0xffee, 0xffef };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff3, 0xfffffff4 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x54 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x4, 0x5, 0x6, 0x7,
+					0x8, 0x9, 0xa, 0xb };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xe, 0xf, 0x10, 0x11 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x18, 0x19 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0xe6, 0xe7, 0xe8, 0xe9,
+					0xea, 0xeb, 0xec, 0xed,
+					0xee, 0xef, 0xf0, 0xf1,
+					0xf2, 0xf3, 0xf4, 0xf5 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xffdc, 0xffdd, 0xffde, 0xffdf,
+					0xffe0, 0xffe1, 0xffe2, 0xffe3 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffd2, 0xffffffd3,
+					0xffffffd4, 0xffffffd5 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x8, 0x9 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xfc, 0xfd, 0xfe, 0xff,
+					 0x0, 0x1, 0x2, 0x3,
+					 0x4, 0x5, 0x6, 0x7,
+					 0x8, 0x9, 0xa, 0xb };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff3, 0xfff4, 0xfff5, 0xfff6,
+					 0xfff7, 0xfff8, 0xfff9, 0xfffa };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x27, 0x28, 0x29, 0x2a };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff3,
+					 0xfffffffffffffff4 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+					   0x33333333, 0x33333333 };
+
+/* Expected results for float32 variants. Needs to be separated since
+   the generic test function does not test floating-point
+   versions.  */
+VECT_VAR_DECL(expected_float32,hfloat,32,2) [] = { 0x40d9999a, 0x40d9999a };
+VECT_VAR_DECL(expected_float32,hfloat,32,4) [] = { 0x41100000, 0x41100000,
+						   0x41100000, 0x41100000 };
+
+void exec_vadd_f32(void)
+{
+  DECL_VARIABLE(vector, float, 32, 2);
+  DECL_VARIABLE(vector, float, 32, 4);
+
+  DECL_VARIABLE(vector2, float, 32, 2);
+  DECL_VARIABLE(vector2, float, 32, 4);
+
+  DECL_VARIABLE(vector_res, float, 32, 2);
+  DECL_VARIABLE(vector_res, float, 32, 4);
+
+  VDUP(vector, , float, f, 32, 2, 2.3f);
+  VDUP(vector, q, float, f, 32, 4, 3.4f);
+
+  VDUP(vector2, , float, f, 32, 2, 4.5f);
+  VDUP(vector2, q, float, f, 32, 4, 5.6f);
+
+  TEST_BINARY_OP(INSN_NAME, , float, f, 32, 2);
+  TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4);
+
+  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_float32, "");
+  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_float32, "");
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c
@@ -0,0 +1,180 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results.  */
+/* Chunk 0.  */
+VECT_VAR_DECL(expected0,int,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
+					0xf0, 0xf0, 0xf0, 0xf0 };
+VECT_VAR_DECL(expected0,int,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+VECT_VAR_DECL(expected0,int,32,2) [] = { 0xfffffff0, 0xfffffff0 };
+VECT_VAR_DECL(expected0,int,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected0,uint,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
+					 0xf0, 0xf0, 0xf0, 0xf0 };
+VECT_VAR_DECL(expected0,uint,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+VECT_VAR_DECL(expected0,uint,32,2) [] = { 0xfffffff0, 0xfffffff0 };
+VECT_VAR_DECL(expected0,uint,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
+					 0xf0, 0xf0, 0xf0, 0xf0 };
+VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1800000 };
+VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
+					 0xf0, 0xf0, 0xf0, 0xf0,
+					 0xf0, 0xf0, 0xf0, 0xf0,
+					 0xf0, 0xf0, 0xf0, 0xf0 };
+VECT_VAR_DECL(expected0,int,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0,
+					 0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+VECT_VAR_DECL(expected0,int,32,4) [] = { 0xfffffff0, 0xfffffff0,
+					 0xfffffff0, 0xfffffff0 };
+VECT_VAR_DECL(expected0,int,64,2) [] = { 0xfffffffffffffff0,
+					 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected0,uint,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
+					  0xf0, 0xf0, 0xf0, 0xf0,
+					  0xf0, 0xf0, 0xf0, 0xf0,
+					  0xf0, 0xf0, 0xf0, 0xf0 };
+VECT_VAR_DECL(expected0,uint,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0,
+					  0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+VECT_VAR_DECL(expected0,uint,32,4) [] = { 0xfffffff0, 0xfffffff0,
+					  0xfffffff0, 0xfffffff0 };
+VECT_VAR_DECL(expected0,uint,64,2) [] = { 0xfffffffffffffff0,
+					  0xfffffffffffffff0 };
+VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
+					  0xf0, 0xf0, 0xf0, 0xf0,
+					  0xf0, 0xf0, 0xf0, 0xf0,
+					  0xf0, 0xf0, 0xf0, 0xf0 };
+VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0,
+					  0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1800000,
+					    0xc1800000, 0xc1800000 };
+
+/* Chunk 1.  */
+VECT_VAR_DECL(expected1,int,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
+					0xf1, 0xf1, 0xf1, 0xf1 };
+VECT_VAR_DECL(expected1,int,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+VECT_VAR_DECL(expected1,int,32,2) [] = { 0xfffffff1, 0xfffffff1 };
+VECT_VAR_DECL(expected1,int,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected1,uint,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
+					 0xf1, 0xf1, 0xf1, 0xf1 };
+VECT_VAR_DECL(expected1,uint,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+VECT_VAR_DECL(expected1,uint,32,2) [] = { 0xfffffff1, 0xfffffff1 };
+VECT_VAR_DECL(expected1,uint,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
+					 0xf1, 0xf1, 0xf1, 0xf1 };
+VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 };
+VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
+					 0xf1, 0xf1, 0xf1, 0xf1,
+					 0xf1, 0xf1, 0xf1, 0xf1,
+					 0xf1, 0xf1, 0xf1, 0xf1 };
+VECT_VAR_DECL(expected1,int,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1,
+					 0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+VECT_VAR_DECL(expected1,int,32,4) [] = { 0xfffffff1, 0xfffffff1,
+					 0xfffffff1, 0xfffffff1 };
+VECT_VAR_DECL(expected1,int,64,2) [] = { 0xfffffffffffffff1,
+					 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected1,uint,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
+					  0xf1, 0xf1, 0xf1, 0xf1,
+					  0xf1, 0xf1, 0xf1, 0xf1,
+					  0xf1, 0xf1, 0xf1, 0xf1 };
+VECT_VAR_DECL(expected1,uint,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1,
+					  0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+VECT_VAR_DECL(expected1,uint,32,4) [] = { 0xfffffff1, 0xfffffff1,
+					  0xfffffff1, 0xfffffff1 };
+VECT_VAR_DECL(expected1,uint,64,2) [] = { 0xfffffffffffffff1,
+					  0xfffffffffffffff1 };
+VECT_VAR_DECL(expected1,poly,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
+					  0xf1, 0xf1, 0xf1, 0xf1,
+					  0xf1, 0xf1, 0xf1, 0xf1,
+					  0xf1, 0xf1, 0xf1, 0xf1 };
+VECT_VAR_DECL(expected1,poly,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1,
+					  0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0xc1700000, 0xc1700000,
+					    0xc1700000, 0xc1700000 };
+
+/* Chunk 2.  */
+VECT_VAR_DECL(expected2,int,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
+					0xf2, 0xf2, 0xf2, 0xf2 };
+VECT_VAR_DECL(expected2,int,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+VECT_VAR_DECL(expected2,int,32,2) [] = { 0xfffffff2, 0xfffffff2 };
+VECT_VAR_DECL(expected2,int,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected2,uint,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
+					 0xf2, 0xf2, 0xf2, 0xf2 };
+VECT_VAR_DECL(expected2,uint,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+VECT_VAR_DECL(expected2,uint,32,2) [] = { 0xfffffff2, 0xfffffff2 };
+VECT_VAR_DECL(expected2,uint,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected2,poly,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
+					 0xf2, 0xf2, 0xf2, 0xf2 };
+VECT_VAR_DECL(expected2,poly,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+VECT_VAR_DECL(expected2,hfloat,32,2) [] = { 0xc1600000, 0xc1600000 };
+VECT_VAR_DECL(expected2,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
+					 0xf2, 0xf2, 0xf2, 0xf2,
+					 0xf2, 0xf2, 0xf2, 0xf2,
+					 0xf2, 0xf2, 0xf2, 0xf2 };
+VECT_VAR_DECL(expected2,int,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2,
+					 0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+VECT_VAR_DECL(expected2,int,32,4) [] = { 0xfffffff2, 0xfffffff2,
+					 0xfffffff2, 0xfffffff2 };
+VECT_VAR_DECL(expected2,int,64,2) [] = { 0xfffffffffffffff2,
+					 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected2,uint,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
+					  0xf2, 0xf2, 0xf2, 0xf2,
+					  0xf2, 0xf2, 0xf2, 0xf2,
+					  0xf2, 0xf2, 0xf2, 0xf2 };
+VECT_VAR_DECL(expected2,uint,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2,
+					  0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+VECT_VAR_DECL(expected2,uint,32,4) [] = { 0xfffffff2, 0xfffffff2,
+					  0xfffffff2, 0xfffffff2 };
+VECT_VAR_DECL(expected2,uint,64,2) [] = { 0xfffffffffffffff2,
+					  0xfffffffffffffff2 };
+VECT_VAR_DECL(expected2,poly,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
+					  0xf2, 0xf2, 0xf2, 0xf2,
+					  0xf2, 0xf2, 0xf2, 0xf2,
+					  0xf2, 0xf2, 0xf2, 0xf2 };
+VECT_VAR_DECL(expected2,poly,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2,
+					  0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1600000, 0xc1600000,
+					    0xc1600000, 0xc1600000 };
+
+#define TEST_MSG "VLD1_DUP/VLD1_DUPQ"
+void exec_vld1_dup (void)
+{
+  int i;
+
+  /* Fill vector with buffer item #i.  */
+#define TEST_VLD1_DUP(VAR, BUF, Q, T1, T2, W, N)			\
+  VECT_VAR(VAR, T1, W, N) =						\
+    vld1##Q##_dup_##T2##W(&VECT_VAR(BUF, T1, W, N)[i]);			\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(VAR, T1, W, N))
+
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+
+  /* Try to read different places from the input buffer.  */
+  for (i=0; i<3; i++) {
+    clean_results ();
+
+    TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLD1_DUP, vector, buffer_dup);
+
+    TEST_VLD1_DUP(vector, buffer_dup, , float, f, 32, 2);
+    TEST_VLD1_DUP(vector, buffer_dup, q, float, f, 32, 4);
+
+    switch (i) {
+    case 0:
+      CHECK_RESULTS_NAMED (TEST_MSG, expected0, "");
+      break;
+    case 1:
+      CHECK_RESULTS_NAMED (TEST_MSG, expected1, "");
+      break;
+    case 2:
+      CHECK_RESULTS_NAMED (TEST_MSG, expected2, "");
+      break;
+    default:
+      abort();
+    }
+  }
+}
+
+int main (void)
+{
+  exec_vld1_dup ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsub.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsub.c
@@ -0,0 +1,82 @@
+#define INSN_NAME vsub
+#define TEST_MSG "VSUB/VSUBQ"
+
+/* Extra tests for functions requiring floating-point types */
+void exec_vsub_f32(void);
+#define EXTRA_TESTS exec_vsub_f32
+
+#include "binary_op.inc"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xee, 0xef, 0xf0, 0xf1,
+				       0xf2, 0xf3, 0xf4, 0xf5 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xffffffed, 0xffffffee };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffff8c };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xdc, 0xdd, 0xde, 0xdf,
+					0xe0, 0xe1, 0xe2, 0xe3 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffd2, 0xffd3, 0xffd4, 0xffd5 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffc8, 0xffffffc9 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffffee };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0xfa, 0xfb, 0xfc, 0xfd,
+					0xfe, 0xff, 0x0, 0x1,
+					0x2, 0x3, 0x4, 0x5,
+					0x6, 0x7, 0x8, 0x9 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x4, 0x5, 0x6, 0x7,
+					0x8, 0x9, 0xa, 0xb };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xe, 0xf, 0x10, 0x11 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffd8,
+					0xffffffffffffffd9 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xe4, 0xe5, 0xe6, 0xe7,
+					 0xe8, 0xe9, 0xea, 0xeb,
+					 0xec, 0xed, 0xee, 0xef,
+					 0xf0, 0xf1, 0xf2, 0xf3};
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffed, 0xffee, 0xffef, 0xfff0,
+					 0xfff1, 0xfff2, 0xfff3, 0xfff4 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffb9, 0xffffffba,
+					 0xffffffbb, 0xffffffbc };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffed,
+					 0xffffffffffffffee };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+					  0x33333333, 0x33333333 };
+
+/* Expected results for float32 variants. Needs to be separated since
+   the generic test function does not test floating-point
+   versions.  */
+VECT_VAR_DECL(expected_float32,hfloat,32,2) [] = { 0xc00ccccd, 0xc00ccccd };
+VECT_VAR_DECL(expected_float32,hfloat,32,4) [] = { 0xc00ccccc, 0xc00ccccc,
+						   0xc00ccccc, 0xc00ccccc };
+
+void exec_vsub_f32(void)
+{
+  DECL_VARIABLE(vector, float, 32, 2);
+  DECL_VARIABLE(vector, float, 32, 4);
+
+  DECL_VARIABLE(vector2, float, 32, 2);
+  DECL_VARIABLE(vector2, float, 32, 4);
+
+  DECL_VARIABLE(vector_res, float, 32, 2);
+  DECL_VARIABLE(vector_res, float, 32, 4);
+
+  VDUP(vector, , float, f, 32, 2, 2.3f);
+  VDUP(vector, q, float, f, 32, 4, 3.4f);
+
+  VDUP(vector2, , float, f, 32, 2, 4.5f);
+  VDUP(vector2, q, float, f, 32, 4, 5.6f);
+
+  TEST_BINARY_OP(INSN_NAME, , float, f, 32, 2);
+  TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4);
+
+  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_float32, "");
+  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_float32, "");
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmulh_lane.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmulh_lane.c
@@ -0,0 +1,121 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected values of cumulative_saturation flag.  */
+int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;
+int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;
+int VECT_VAR(expected_cumulative_sat,int,16,8) = 0;
+int VECT_VAR(expected_cumulative_sat,int,32,4) = 0;
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xffffffff, 0xffffffff };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,
+					0xffff, 0xffff, 0xffff, 0xffff };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffff, 0xffffffff,
+					0xffffffff, 0xffffffff };
+
+/* Expected values of cumulative_saturation flag when saturation
+   occurs.  */
+int VECT_VAR(expected_cumulative_sat2,int,16,4) = 1;
+int VECT_VAR(expected_cumulative_sat2,int,32,2) = 1;
+int VECT_VAR(expected_cumulative_sat2,int,16,8) = 1;
+int VECT_VAR(expected_cumulative_sat2,int,32,4) = 1;
+
+/* Expected results when saturation occurs.  */
+VECT_VAR_DECL(expected2,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };
+VECT_VAR_DECL(expected2,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
+VECT_VAR_DECL(expected2,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff,
+					0x7fff, 0x7fff, 0x7fff, 0x7fff };
+VECT_VAR_DECL(expected2,int,32,4) [] = { 0x7fffffff, 0x7fffffff,
+					0x7fffffff, 0x7fffffff };
+
+#define INSN_NAME vqdmulh
+#define TEST_MSG "VQDMULH_LANE"
+#define FNNAME1(NAME) exec_ ## NAME ## _lane
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+  /* vector_res = vqdmulh_lane(vector,vector2,lane), then store the result.  */
+#define TEST_VQDMULH_LANE2(INSN, Q, T1, T2, W, N, N2, L, EXPECTED_CUMULATIVE_SAT, CMT) \
+  Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N));		\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    INSN##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N),			\
+			   VECT_VAR(vector2, T1, W, N2),		\
+			   L);						\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),				\
+		    VECT_VAR(vector_res, T1, W, N));			\
+  CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
+
+  /* Two auxliary macros are necessary to expand INSN.  */
+#define TEST_VQDMULH_LANE1(INSN, Q, T1, T2, W, N, N2, L, EXPECTED_CUMULATIVE_SAT, CMT) \
+  TEST_VQDMULH_LANE2(INSN, Q, T1, T2, W, N, N2, L, EXPECTED_CUMULATIVE_SAT, CMT)
+
+#define TEST_VQDMULH_LANE(Q, T1, T2, W, N, N2, L, EXPECTED_CUMULATIVE_SAT, CMT) \
+  TEST_VQDMULH_LANE1(INSN_NAME, Q, T1, T2, W, N, N2, L, EXPECTED_CUMULATIVE_SAT, CMT)
+
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector, int, 32, 4);
+
+  DECL_VARIABLE(vector_res, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 2);
+  DECL_VARIABLE(vector_res, int, 16, 8);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+
+  /* vector2: vqdmulh_lane and vqdmulhq_lane have a 2nd argument with
+     the same number of elements, so we need only one variable of each
+     type.  */
+  DECL_VARIABLE(vector2, int, 16, 4);
+  DECL_VARIABLE(vector2, int, 32, 2);
+
+  clean_results ();
+
+  VLOAD(vector, buffer, , int, s, 16, 4);
+  VLOAD(vector, buffer, , int, s, 32, 2);
+  VLOAD(vector, buffer, q, int, s, 16, 8);
+  VLOAD(vector, buffer, q, int, s, 32, 4);
+
+  /* Initialize vector2.  */
+  VDUP(vector2, , int, s, 16, 4, 0x55);
+  VDUP(vector2, , int, s, 32, 2, 0xBB);
+
+  /* Choose lane arbitrarily.  */
+  TEST_VQDMULH_LANE(, int, s, 16, 4, 4, 2, expected_cumulative_sat, "");
+  TEST_VQDMULH_LANE(, int, s, 32, 2, 2, 1, expected_cumulative_sat, "");
+  TEST_VQDMULH_LANE(q, int, s, 16, 8, 4, 3, expected_cumulative_sat, "");
+  TEST_VQDMULH_LANE(q, int, s, 32, 4, 2, 0, expected_cumulative_sat, "");
+
+  CHECK (TEST_MSG, int, 16, 4, PRIx16, expected, "");
+  CHECK (TEST_MSG, int, 32, 2, PRIx32, expected, "");
+  CHECK (TEST_MSG, int, 16, 8, PRIx16, expected, "");
+  CHECK (TEST_MSG, int, 32, 4, PRIx32, expected, "");
+
+  /* Choose input values to trigger saturation.  */
+  VDUP(vector, , int, s, 16, 4, 0x8000);
+  VDUP(vector, , int, s, 32, 2, 0x80000000);
+  VDUP(vector, q, int, s, 16, 8, 0x8000);
+  VDUP(vector, q, int, s, 32, 4, 0x80000000);
+  VDUP(vector2, , int, s, 16, 4, 0x8000);
+  VDUP(vector2, , int, s, 32, 2, 0x80000000);
+
+#define TEST_MSG2 " (check mul cumulative saturation)"
+  TEST_VQDMULH_LANE(, int, s, 16, 4, 4, 3, expected_cumulative_sat2, TEST_MSG2);
+  TEST_VQDMULH_LANE(, int, s, 32, 2, 2, 1, expected_cumulative_sat2, TEST_MSG2);
+  TEST_VQDMULH_LANE(q, int, s, 16, 8, 4, 2, expected_cumulative_sat2, TEST_MSG2);
+  TEST_VQDMULH_LANE(q, int, s, 32, 4, 2, 1, expected_cumulative_sat2, TEST_MSG2);
+
+  CHECK (TEST_MSG, int, 16, 4, PRIx16, expected2, TEST_MSG2);
+  CHECK (TEST_MSG, int, 32, 2, PRIx32, expected2, TEST_MSG2);
+  CHECK (TEST_MSG, int, 16, 8, PRIx16, expected2, TEST_MSG2);
+  CHECK (TEST_MSG, int, 32, 4, PRIx32, expected2, TEST_MSG2);
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_op_no64.inc
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_op_no64.inc
@@ -0,0 +1,134 @@
+/* Can't use the standard binary_op.inc template because vmax has no
+   64 bits variant.  */
+
+#include <math.h>
+
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+  int i;
+
+  /* Basic test: y=vmax(x,x), then store the result.  */
+#define TEST_BINARY_OP1(INSN, Q, T1, T2, W, N)				\
+  VECT_VAR(vector_res, T1, W, N) =                                      \
+    INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N),                       \
+                      VECT_VAR(vector2, T1, W, N));                     \
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+#define TEST_BINARY_OP(INSN, Q, T1, T2, W, N)   \
+  TEST_BINARY_OP1(INSN, Q, T1, T2, W, N)        \
+
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+  DECL_VARIABLE_ALL_VARIANTS(vector2);
+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer".  */
+  TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
+#ifdef HAS_FLOAT_VARIANT
+  VLOAD(vector, buffer, , float, f, 32, 2);
+  VLOAD(vector, buffer, q, float, f, 32, 4);
+#endif
+
+  /* Choose init value arbitrarily, will be used as comparison value.  */
+  VDUP(vector2, , int, s, 8, 8, -13);
+  VDUP(vector2, , int, s, 16, 4, -14);
+  VDUP(vector2, , int, s, 32, 2, -16);
+  VDUP(vector2, , uint, u, 8, 8, 0xf3);
+  VDUP(vector2, , uint, u, 16, 4, 0xfff1);
+  VDUP(vector2, , uint, u, 32, 2, 0xfffffff0);
+  VDUP(vector2, q, int, s, 8, 16, -12);
+  VDUP(vector2, q, int, s, 16, 8, -13);
+  VDUP(vector2, q, int, s, 32, 4, -15);
+  VDUP(vector2, q, uint, u, 8, 16, 0xf9);
+  VDUP(vector2, q, uint, u, 16, 8, 0xfff2);
+  VDUP(vector2, q, uint, u, 32, 4, 0xfffffff1);
+#ifdef HAS_FLOAT_VARIANT
+  VDUP(vector2, , float, f, 32, 2, -15.5f);
+  VDUP(vector2, q, float, f, 32, 4, -14.5f);
+#endif
+
+#ifdef HAS_FLOAT_VARIANT
+#define FLOAT_VARIANT(MACRO, VAR)			\
+  MACRO(VAR, , float, f, 32, 2);			\
+  MACRO(VAR, q, float, f, 32, 4)
+#else
+#define FLOAT_VARIANT(MACRO, VAR)
+#endif
+
+#define TEST_MACRO_NO64BIT_VARIANT_1_5(MACRO, VAR)	\
+  MACRO(VAR, , int, s, 8, 8);				\
+  MACRO(VAR, , int, s, 16, 4);				\
+  MACRO(VAR, , int, s, 32, 2);				\
+  MACRO(VAR, , uint, u, 8, 8);				\
+  MACRO(VAR, , uint, u, 16, 4);				\
+  MACRO(VAR, , uint, u, 32, 2);				\
+  MACRO(VAR, q, int, s, 8, 16);				\
+  MACRO(VAR, q, int, s, 16, 8);				\
+  MACRO(VAR, q, int, s, 32, 4);				\
+  MACRO(VAR, q, uint, u, 8, 16);			\
+  MACRO(VAR, q, uint, u, 16, 8);			\
+  MACRO(VAR, q, uint, u, 32, 4);			\
+  FLOAT_VARIANT(MACRO, VAR)
+
+  /* Apply a binary operator named INSN_NAME.  */
+  TEST_MACRO_NO64BIT_VARIANT_1_5(TEST_BINARY_OP, INSN_NAME);
+
+  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, "");
+  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, "");
+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, "");
+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, "");
+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, "");
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, "");
+  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, "");
+  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, "");
+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, "");
+  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, "");
+  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, "");
+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, "");
+
+#ifdef HAS_FLOAT_VARIANT
+  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, "");
+  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, "");
+
+  /* Extra FP tests with special values (NaN, ....)  */
+  VDUP(vector, q, float, f, 32, 4, 1.0f);
+  VDUP(vector2, q, float, f, 32, 4, NAN);
+  TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4);
+  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_nan, " FP special (NaN)");
+
+  VDUP(vector, q, float, f, 32, 4, -NAN);
+  VDUP(vector2, q, float, f, 32, 4, 1.0f);
+  TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4);
+  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_mnan, " FP special (-NaN)");
+
+  VDUP(vector, q, float, f, 32, 4, 1.0f);
+  VDUP(vector2, q, float, f, 32, 4, HUGE_VALF);
+  TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4);
+  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_inf, " FP special (inf)");
+
+  VDUP(vector, q, float, f, 32, 4, -HUGE_VALF);
+  VDUP(vector2, q, float, f, 32, 4, 1.0f);
+  TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4);
+  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_minf, " FP special (-inf)");
+
+  VDUP(vector, q, float, f, 32, 4, 0.0f);
+  VDUP(vector2, q, float, f, 32, 4, -0.0f);
+  TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4);
+  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_zero1, " FP special (-0.0)");
+
+  VDUP(vector, q, float, f, 32, 4, -0.0f);
+  VDUP(vector2, q, float, f, 32, 4, 0.0f);
+  TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4);
+  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_zero2, " FP special (-0.0)");
+#endif
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqneg.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqneg.c
@@ -0,0 +1,127 @@
+#define INSN_NAME vqneg
+#define TEST_MSG "VQNEG/VQNEGQ"
+
+/* Extra tests for functions requiring corner cases tests */
+void vqneg_extra(void);
+#define EXTRA_TESTS vqneg_extra
+
+#include "unary_sat_op.inc"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x10, 0xf, 0xe, 0xd };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x10, 0xf };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x10, 0xf, 0xe, 0xd,
+					0xc, 0xb, 0xa, 0x9,
+					0x8, 0x7, 0x6, 0x5,
+					0x4, 0x3, 0x2, 0x1 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x10, 0xf, 0xe, 0xd,
+					0xc, 0xb, 0xa, 0x9 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x10, 0xf, 0xe, 0xd };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333,
+					 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+					 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+					   0x33333333, 0x33333333 };
+
+/* Expected values of cumulative_saturation flag.  */
+int VECT_VAR(expected_cumulative_sat,int,8,8) = 0;
+int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;
+int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;
+int VECT_VAR(expected_cumulative_sat,int,8,16) = 0;
+int VECT_VAR(expected_cumulative_sat,int,16,8) = 0;
+int VECT_VAR(expected_cumulative_sat,int,32,4) = 0;
+
+/* Expected results when input is the min negative value of the type.  */
+VECT_VAR_DECL(expected_min_neg,int,8,8) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
+					       0x7f, 0x7f, 0x7f, 0x7f };
+VECT_VAR_DECL(expected_min_neg,int,16,4) [] = { 0x7fff, 0x7fff,
+						0x7fff, 0x7fff };
+VECT_VAR_DECL(expected_min_neg,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
+VECT_VAR_DECL(expected_min_neg,int,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
+						0x7f, 0x7f, 0x7f, 0x7f,
+						0x7f, 0x7f, 0x7f, 0x7f,
+						0x7f, 0x7f, 0x7f, 0x7f };
+VECT_VAR_DECL(expected_min_neg,int,16,8) [] = { 0x7fff, 0x7fff,
+						0x7fff, 0x7fff,
+						0x7fff, 0x7fff,
+						0x7fff, 0x7fff };
+VECT_VAR_DECL(expected_min_neg,int,32,4) [] = { 0x7fffffff, 0x7fffffff,
+						0x7fffffff, 0x7fffffff };
+
+/* Expected values of cumulative_saturation flag when input is the min
+   negative value of the type.  */
+int VECT_VAR(expected_cumulative_sat_min_neg,int,8,8) = 1;
+int VECT_VAR(expected_cumulative_sat_min_neg,int,16,4) = 1;
+int VECT_VAR(expected_cumulative_sat_min_neg,int,32,2) = 1;
+int VECT_VAR(expected_cumulative_sat_min_neg,int,8,16) = 1;
+int VECT_VAR(expected_cumulative_sat_min_neg,int,16,8) = 1;
+int VECT_VAR(expected_cumulative_sat_min_neg,int,32,4) = 1;
+
+void vqneg_extra()
+{
+  /* No need for 64 bits variants.  */
+  DECL_VARIABLE(vector, int, 8, 8);
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, int, 8, 16);
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector, int, 32, 4);
+
+  DECL_VARIABLE(vector_res, int, 8, 8);
+  DECL_VARIABLE(vector_res, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 2);
+  DECL_VARIABLE(vector_res, int, 8, 16);
+  DECL_VARIABLE(vector_res, int, 16, 8);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+
+  clean_results ();
+
+  /* Initialize input "vector" with min negative values to check
+     saturation.  */
+  VDUP(vector, , int, s, 8, 8, 0x80);
+  VDUP(vector, , int, s, 16, 4, 0x8000);
+  VDUP(vector, , int, s, 32, 2, 0x80000000);
+  VDUP(vector, q, int, s, 8, 16, 0x80);
+  VDUP(vector, q, int, s, 16, 8, 0x8000);
+  VDUP(vector, q, int, s, 32, 4, 0x80000000);
+
+#define MSG "min negative input"
+  TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 8, 8, expected_cumulative_sat_min_neg, MSG);
+  TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 16, 4, expected_cumulative_sat_min_neg, MSG);
+  TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 32, 2, expected_cumulative_sat_min_neg, MSG);
+  TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 8, 16, expected_cumulative_sat_min_neg, MSG);
+  TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 16, 8, expected_cumulative_sat_min_neg, MSG);
+  TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 32, 4, expected_cumulative_sat_min_neg, MSG);
+
+  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_min_neg, MSG);
+  CHECK(TEST_MSG, int, 16, 4, PRIx8, expected_min_neg, MSG);
+  CHECK(TEST_MSG, int, 32, 2, PRIx8, expected_min_neg, MSG);
+  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_min_neg, MSG);
+  CHECK(TEST_MSG, int, 16, 8, PRIx8, expected_min_neg, MSG);
+  CHECK(TEST_MSG, int, 32, 4, PRIx8, expected_min_neg, MSG);
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_sat_op.inc
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_sat_op.inc
@@ -0,0 +1,91 @@
+/* Template file for saturating binary operator validation.
+
+   This file is meant to be included by the relevant test files, which
+   have to define the intrinsic family to test. If a given intrinsic
+   supports variants which are not supported by all the other
+   saturating binary operators, these can be tested by providing a
+   definition for EXTRA_TESTS.  */
+
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+  /* vector_res = OP(vector1,vector2), then store the result.  */
+
+#define TEST_BINARY_SAT_OP1(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \
+  Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N));		\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    INSN##Q##_##T2##W(VECT_VAR(vector1, T1, W, N),			\
+		      VECT_VAR(vector2, T1, W, N));			\
+    vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),			\
+		      VECT_VAR(vector_res, T1, W, N));			\
+      CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
+
+#define TEST_BINARY_SAT_OP(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \
+  TEST_BINARY_SAT_OP1(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
+
+  DECL_VARIABLE_ALL_VARIANTS(vector1);
+  DECL_VARIABLE_ALL_VARIANTS(vector2);
+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+  clean_results ();
+
+  /* Initialize input "vector1" from "buffer".  */
+  TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector1, buffer);
+
+  /* Choose arbitrary initialization values.  */
+  VDUP(vector2, , int, s, 8, 8, 0x11);
+  VDUP(vector2, , int, s, 16, 4, 0x22);
+  VDUP(vector2, , int, s, 32, 2, 0x33);
+  VDUP(vector2, , int, s, 64, 1, 0x44);
+  VDUP(vector2, , uint, u, 8, 8, 0x55);
+  VDUP(vector2, , uint, u, 16, 4, 0x66);
+  VDUP(vector2, , uint, u, 32, 2, 0x77);
+  VDUP(vector2, , uint, u, 64, 1, 0x88);
+
+  VDUP(vector2, q, int, s, 8, 16, 0x11);
+  VDUP(vector2, q, int, s, 16, 8, 0x22);
+  VDUP(vector2, q, int, s, 32, 4, 0x33);
+  VDUP(vector2, q, int, s, 64, 2, 0x44);
+  VDUP(vector2, q, uint, u, 8, 16, 0x55);
+  VDUP(vector2, q, uint, u, 16, 8, 0x66);
+  VDUP(vector2, q, uint, u, 32, 4, 0x77);
+  VDUP(vector2, q, uint, u, 64, 2, 0x88);
+
+  /* Apply a saturating binary operator named INSN_NAME.  */
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 8, 8, expected_cumulative_sat, "");
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 16, 4, expected_cumulative_sat, "");
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 32, 2, expected_cumulative_sat, "");
+  TEST_BINARY_SAT_OP(INSN_NAME, , int, s, 64, 1, expected_cumulative_sat, "");
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 8, 8, expected_cumulative_sat, "");
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 16, 4, expected_cumulative_sat, "");
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 32, 2, expected_cumulative_sat, "");
+  TEST_BINARY_SAT_OP(INSN_NAME, , uint, u, 64, 1, expected_cumulative_sat, "");
+
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 8, 16, expected_cumulative_sat, "");
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 16, 8, expected_cumulative_sat, "");
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 32, 4, expected_cumulative_sat, "");
+  TEST_BINARY_SAT_OP(INSN_NAME, q, int, s, 64, 2, expected_cumulative_sat, "");
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 8, 16, expected_cumulative_sat, "");
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 16, 8, expected_cumulative_sat, "");
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 32, 4, expected_cumulative_sat, "");
+  TEST_BINARY_SAT_OP(INSN_NAME, q, uint, u, 64, 2, expected_cumulative_sat, "");
+
+  CHECK_RESULTS (TEST_MSG, "");
+
+#ifdef EXTRA_TESTS
+  EXTRA_TESTS();
+#endif
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmull_lane.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmull_lane.c
@@ -0,0 +1,94 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected values of cumulative_saturation flag.  */
+int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;
+int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x8000, 0x8000, 0x8000, 0x8000 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x4000, 0x4000 };
+
+/* Expected values of cumulative_saturation flag when saturation
+   occurs.  */
+int VECT_VAR(expected_cumulative_sat2,int,16,4) = 1;
+int VECT_VAR(expected_cumulative_sat2,int,32,2) = 1;
+
+/* Expected results when saturation occurs.  */
+VECT_VAR_DECL(expected2,int,32,4) [] = { 0x7fffffff, 0x7fffffff,
+					 0x7fffffff, 0x7fffffff };
+VECT_VAR_DECL(expected2,int,64,2) [] = { 0x7fffffffffffffff,
+					 0x7fffffffffffffff };
+
+#define INSN_NAME vqdmull
+#define TEST_MSG "VQDMULL_LANE"
+
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+  int i;
+
+  /* vector_res = vqdmull_lane(vector,vector2,lane), then store the result.  */
+#define TEST_VQDMULL_LANE2(INSN, T1, T2, W, W2, N, L, EXPECTED_CUMULATIVE_SAT, CMT) \
+  Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W2, N));		\
+  VECT_VAR(vector_res, T1, W2, N) =					\
+    INSN##_lane_##T2##W(VECT_VAR(vector, T1, W, N),			\
+			VECT_VAR(vector2, T1, W, N),			\
+			L);						\
+  vst1q_##T2##W2(VECT_VAR(result, T1, W2, N),				\
+		 VECT_VAR(vector_res, T1, W2, N));			\
+  CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
+
+  /* Two auxliary macros are necessary to expand INSN.  */
+#define TEST_VQDMULL_LANE1(INSN, T1, T2, W, W2, N, L, EXPECTED_CUMULATIVE_SAT, CMT) \
+  TEST_VQDMULL_LANE2(INSN, T1, T2, W, W2, N, L, EXPECTED_CUMULATIVE_SAT, CMT)
+
+#define TEST_VQDMULL_LANE(T1, T2, W, W2, N, L, EXPECTED_CUMULATIVE_SAT, CMT) \
+  TEST_VQDMULL_LANE1(INSN_NAME, T1, T2, W, W2, N, L, EXPECTED_CUMULATIVE_SAT, CMT)
+
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector2, int, 16, 4);
+  DECL_VARIABLE(vector2, int, 32, 2);
+
+  DECL_VARIABLE(vector_res, int, 32, 4);
+  DECL_VARIABLE(vector_res, int, 64, 2);
+
+  clean_results ();
+
+  /* Initialize vector.  */
+  VDUP(vector, , int, s, 16, 4, 0x1000);
+  VDUP(vector, , int, s, 32, 2, 0x1000);
+
+  /* Initialize vector2.  */
+  VDUP(vector2, , int, s, 16, 4, 0x4);
+  VDUP(vector2, , int, s, 32, 2, 0x2);
+
+  /* Choose lane arbitrarily.  */
+  TEST_VQDMULL_LANE(int, s, 16, 32, 4, 2, expected_cumulative_sat, "");
+  TEST_VQDMULL_LANE(int, s, 32, 64, 2, 1, expected_cumulative_sat, "");
+
+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, "");
+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, "");
+
+  VDUP(vector, , int, s, 16, 4, 0x8000);
+  VDUP(vector2, , int, s, 16, 4, 0x8000);
+  VDUP(vector, , int, s, 32, 2, 0x80000000);
+  VDUP(vector2, , int, s, 32, 2, 0x80000000);
+
+#define TEST_MSG2 "with saturation"
+  TEST_VQDMULL_LANE(int, s, 16, 32, 4, 2, expected_cumulative_sat2, TEST_MSG2);
+  TEST_VQDMULL_LANE(int, s, 32, 64, 2, 1, expected_cumulative_sat2, TEST_MSG2);
+
+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected2, TEST_MSG2);
+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected2, TEST_MSG2);
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul.c
@@ -0,0 +1,156 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0x1, 0x12, 0x23,
+				       0x34, 0x45, 0x56, 0x67 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfde0, 0xfe02, 0xfe24, 0xfe46 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffcd0, 0xfffffd03 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xc0, 0x4, 0x48, 0x8c,
+					0xd0, 0x14, 0x58, 0x9c };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfab0, 0xfb05, 0xfb5a, 0xfbaf };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffff9a0, 0xfffffa06 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0xc0, 0x84, 0x48, 0xc,
+					0xd0, 0x94, 0x58, 0x1c };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc4053333, 0xc3f9c000 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x90, 0x7, 0x7e, 0xf5,
+					0x6c, 0xe3, 0x5a, 0xd1,
+					0x48, 0xbf, 0x36, 0xad,
+					0x24, 0x9b, 0x12, 0x89 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xf780, 0xf808, 0xf890, 0xf918,
+					0xf9a0, 0xfa28, 0xfab0, 0xfb38 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffff670, 0xfffff709,
+					0xfffff7a2, 0xfffff83b };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+					0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x60, 0xa, 0xb4, 0x5e,
+					 0x8, 0xb2, 0x5c, 0x6,
+					 0xb0, 0x5a, 0x4, 0xae,
+					 0x58, 0x2, 0xac, 0x56 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xf450, 0xf50b, 0xf5c6, 0xf681,
+					 0xf73c, 0xf7f7, 0xf8b2, 0xf96d };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffff340, 0xfffff40c,
+					 0xfffff4d8, 0xfffff5a4 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+					 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x60, 0xca, 0x34, 0x9e,
+					 0xc8, 0x62, 0x9c, 0x36,
+					 0x30, 0x9a, 0x64, 0xce,
+					 0x98, 0x32, 0xcc, 0x66 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc4c73333, 0xc4bac000,
+					   0xc4ae4ccd, 0xc4a1d999 };
+
+#ifndef INSN_NAME
+#define INSN_NAME vmul
+#define TEST_MSG "VMUL"
+#endif
+
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+#define DECL_VMUL(T, W, N)			\
+  DECL_VARIABLE(vector1, T, W, N);		\
+  DECL_VARIABLE(vector2, T, W, N);		\
+  DECL_VARIABLE(vector_res, T, W, N)
+
+  /* vector_res = OP(vector1, vector2), then store the result.  */
+#define TEST_VMUL1(INSN, Q, T1, T2, W, N)				\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    INSN##Q##_##T2##W(VECT_VAR(vector1, T1, W, N),			\
+		      VECT_VAR(vector2, T1, W, N));			\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),				\
+		    VECT_VAR(vector_res, T1, W, N))
+
+#define TEST_VMUL(INSN, Q, T1, T2, W, N)	\
+  TEST_VMUL1(INSN, Q, T1, T2, W, N)
+
+  DECL_VMUL(int, 8, 8);
+  DECL_VMUL(int, 16, 4);
+  DECL_VMUL(int, 32, 2);
+  DECL_VMUL(uint, 8, 8);
+  DECL_VMUL(uint, 16, 4);
+  DECL_VMUL(uint, 32, 2);
+  DECL_VMUL(poly, 8, 8);
+  DECL_VMUL(float, 32, 2);
+  DECL_VMUL(int, 8, 16);
+  DECL_VMUL(int, 16, 8);
+  DECL_VMUL(int, 32, 4);
+  DECL_VMUL(uint, 8, 16);
+  DECL_VMUL(uint, 16, 8);
+  DECL_VMUL(uint, 32, 4);
+  DECL_VMUL(poly, 8, 16);
+  DECL_VMUL(float, 32, 4);
+
+  clean_results ();
+
+  /* Initialize input "vector1" from "buffer".  */
+  VLOAD(vector1, buffer, , int, s, 8, 8);
+  VLOAD(vector1, buffer, , int, s, 16, 4);
+  VLOAD(vector1, buffer, , int, s, 32, 2);
+  VLOAD(vector1, buffer, , uint, u, 8, 8);
+  VLOAD(vector1, buffer, , uint, u, 16, 4);
+  VLOAD(vector1, buffer, , uint, u, 32, 2);
+  VLOAD(vector1, buffer, , poly, p, 8, 8);
+  VLOAD(vector1, buffer, , float, f, 32, 2);
+  VLOAD(vector1, buffer, q, int, s, 8, 16);
+  VLOAD(vector1, buffer, q, int, s, 16, 8);
+  VLOAD(vector1, buffer, q, int, s, 32, 4);
+  VLOAD(vector1, buffer, q, uint, u, 8, 16);
+  VLOAD(vector1, buffer, q, uint, u, 16, 8);
+  VLOAD(vector1, buffer, q, uint, u, 32, 4);
+  VLOAD(vector1, buffer, q, poly, p, 8, 16);
+  VLOAD(vector1, buffer, q, float, f, 32, 4);
+
+  /* Choose init value arbitrarily.  */
+  VDUP(vector2, , int, s, 8, 8, 0x11);
+  VDUP(vector2, , int, s, 16, 4, 0x22);
+  VDUP(vector2, , int, s, 32, 2, 0x33);
+  VDUP(vector2, , uint, u, 8, 8, 0x44);
+  VDUP(vector2, , uint, u, 16, 4, 0x55);
+  VDUP(vector2, , uint, u, 32, 2, 0x66);
+  VDUP(vector2, , poly, p, 8, 8, 0x44);
+  VDUP(vector2, , float, f, 32, 2, 33.3f);
+  VDUP(vector2, q, int, s, 8, 16, 0x77);
+  VDUP(vector2, q, int, s, 16, 8, 0x88);
+  VDUP(vector2, q, int, s, 32, 4, 0x99);
+  VDUP(vector2, q, uint, u, 8, 16, 0xAA);
+  VDUP(vector2, q, uint, u, 16, 8, 0xBB);
+  VDUP(vector2, q, uint, u, 32, 4, 0xCC);
+  VDUP(vector2, q, poly, p, 8, 16, 0xAA);
+  VDUP(vector2, q, float, f, 32, 4, 99.6f);
+
+  /* Execute the tests.  */
+  TEST_VMUL(INSN_NAME, , int, s, 8, 8);
+  TEST_VMUL(INSN_NAME, , int, s, 16, 4);
+  TEST_VMUL(INSN_NAME, , int, s, 32, 2);
+  TEST_VMUL(INSN_NAME, , uint, u, 8, 8);
+  TEST_VMUL(INSN_NAME, , uint, u, 16, 4);
+  TEST_VMUL(INSN_NAME, , uint, u, 32, 2);
+  TEST_VMUL(INSN_NAME, , poly, p, 8, 8);
+  TEST_VMUL(INSN_NAME, , float, f, 32, 2);
+  TEST_VMUL(INSN_NAME, q, int, s, 8, 16);
+  TEST_VMUL(INSN_NAME, q, int, s, 16, 8);
+  TEST_VMUL(INSN_NAME, q, int, s, 32, 4);
+  TEST_VMUL(INSN_NAME, q, uint, u, 8, 16);
+  TEST_VMUL(INSN_NAME, q, uint, u, 16, 8);
+  TEST_VMUL(INSN_NAME, q, uint, u, 32, 4);
+  TEST_VMUL(INSN_NAME, q, poly, p, 8, 16);
+  TEST_VMUL(INSN_NAME, q, float, f, 32, 4);
+
+  CHECK_RESULTS (TEST_MSG, "");
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vraddhn.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vraddhn.c
@@ -0,0 +1,24 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#if defined(__cplusplus)
+#include <cstdint>
+#else
+#include <stdint.h>
+#endif
+
+#define INSN_NAME vraddhn
+#define TEST_MSG "VRADDHN"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+				       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x19, 0x19 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x4, 0x4, 0x4, 0x4,
+					0x4, 0x4, 0x4, 0x4 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x38, 0x38, 0x38, 0x38 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x4, 0x4 };
+
+#include "vXXXhn.inc"
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip.c
@@ -0,0 +1,103 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results splitted in several chunks.  */
+/* Chunk 0.  */
+VECT_VAR_DECL(expected0,int,8,8) [] = { 0xf0, 0xf4, 0x11, 0x11,
+					0xf1, 0xf5, 0x11, 0x11 };
+VECT_VAR_DECL(expected0,int,16,4) [] = { 0xfff0, 0xfff2,
+					 0x22, 0x22 };
+VECT_VAR_DECL(expected0,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected0,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected0,uint,8,8) [] = { 0xf0, 0xf4, 0x55, 0x55,
+					 0xf1, 0xf5, 0x55, 0x55 };
+VECT_VAR_DECL(expected0,uint,16,4) [] = { 0xfff0, 0xfff2,
+					  0x66, 0x66 };
+VECT_VAR_DECL(expected0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected0,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf4, 0x55, 0x55,
+					 0xf1, 0xf5, 0x55, 0x55 };
+VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff2,
+					  0x66, 0x66 };
+VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
+VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf8, 0x11, 0x11,
+					 0xf1, 0xf9, 0x11, 0x11,
+					 0xf2, 0xfa, 0x11, 0x11,
+					 0xf3, 0xfb, 0x11, 0x11 };
+VECT_VAR_DECL(expected0,int,16,8) [] = { 0xfff0, 0xfff4, 0x22, 0x22,
+					 0xfff1, 0xfff5, 0x22, 0x22 };
+VECT_VAR_DECL(expected0,int,32,4) [] = { 0xfffffff0, 0xfffffff2,
+					 0x33, 0x33 };
+VECT_VAR_DECL(expected0,int,64,2) [] = { 0x3333333333333333,
+					 0x3333333333333333 };
+VECT_VAR_DECL(expected0,uint,8,16) [] = { 0xf0, 0xf8, 0x55, 0x55,
+					  0xf1, 0xf9, 0x55, 0x55,
+					  0xf2, 0xfa, 0x55, 0x55,
+					  0xf3, 0xfb, 0x55, 0x55 };
+VECT_VAR_DECL(expected0,uint,16,8) [] = { 0xfff0, 0xfff4, 0x66, 0x66,
+					  0xfff1, 0xfff5, 0x66, 0x66 };
+VECT_VAR_DECL(expected0,uint,32,4) [] = { 0xfffffff0, 0xfffffff2,
+					  0x77, 0x77 };
+VECT_VAR_DECL(expected0,uint,64,2) [] = { 0x3333333333333333,
+					  0x3333333333333333 };
+VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf8, 0x55, 0x55,
+					  0xf1, 0xf9, 0x55, 0x55,
+					  0xf2, 0xfa, 0x55, 0x55,
+					  0xf3, 0xfb, 0x55, 0x55 };
+VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff4, 0x66, 0x66,
+					  0xfff1, 0xfff5, 0x66, 0x66 };
+VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1600000,
+					    0x42073333, 0x42073333 };
+
+/* Chunk 1.  */
+VECT_VAR_DECL(expected1,int,8,8) [] = { 0xf2, 0xf6, 0x11, 0x11,
+					0xf3, 0xf7, 0x11, 0x11 };
+VECT_VAR_DECL(expected1,int,16,4) [] = { 0xfff1, 0xfff3,
+					 0x22, 0x22 };
+VECT_VAR_DECL(expected1,int,32,2) [] = { 0x33, 0x33 };
+VECT_VAR_DECL(expected1,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected1,uint,8,8) [] = { 0xf2, 0xf6, 0x55, 0x55,
+					 0xf3, 0xf7, 0x55, 0x55 };
+VECT_VAR_DECL(expected1,uint,16,4) [] = { 0xfff1, 0xfff3,
+					  0x66, 0x66 };
+VECT_VAR_DECL(expected1,uint,32,2) [] = { 0x77, 0x77 };
+VECT_VAR_DECL(expected1,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf2, 0xf6, 0x55, 0x55,
+					 0xf3, 0xf7, 0x55, 0x55 };
+VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff1, 0xfff3,
+					  0x66, 0x66 };
+VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0x42066666, 0x42066666 };
+VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf4, 0xfc, 0x11, 0x11,
+					 0xf5, 0xfd, 0x11, 0x11,
+					 0xf6, 0xfe, 0x11, 0x11,
+					 0xf7, 0xff, 0x11, 0x11 };
+VECT_VAR_DECL(expected1,int,16,8) [] = { 0xfff2, 0xfff6, 0x22, 0x22,
+					 0xfff3, 0xfff7, 0x22, 0x22 };
+VECT_VAR_DECL(expected1,int,32,4) [] = { 0xfffffff1, 0xfffffff3,
+					 0x33, 0x33 };
+VECT_VAR_DECL(expected1,int,64,2) [] = { 0x3333333333333333,
+					 0x3333333333333333 };
+VECT_VAR_DECL(expected1,uint,8,16) [] = { 0xf4, 0xfc, 0x55, 0x55,
+					  0xf5, 0xfd, 0x55, 0x55,
+					  0xf6, 0xfe, 0x55, 0x55,
+					  0xf7, 0xff, 0x55, 0x55 };
+VECT_VAR_DECL(expected1,uint,16,8) [] = { 0xfff2, 0xfff6, 0x66, 0x66,
+					  0xfff3, 0xfff7, 0x66, 0x66 };
+VECT_VAR_DECL(expected1,uint,32,4) [] = { 0xfffffff1, 0xfffffff3,
+					  0x77, 0x77 };
+VECT_VAR_DECL(expected1,uint,64,2) [] = { 0x3333333333333333,
+					  0x3333333333333333 };
+VECT_VAR_DECL(expected1,poly,8,16) [] = { 0xf4, 0xfc, 0x55, 0x55,
+					  0xf5, 0xfd, 0x55, 0x55,
+					  0xf6, 0xfe, 0x55, 0x55,
+					  0xf7, 0xff, 0x55, 0x55 };
+VECT_VAR_DECL(expected1,poly,16,8) [] = { 0xfff2, 0xfff6, 0x66, 0x66,
+					  0xfff3, 0xfff7, 0x66, 0x66 };
+VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0xc1700000, 0xc1500000,
+					    0x42073333, 0x42073333 };
+
+#define INSN_NAME vzip
+#define TEST_MSG "VZIP/VZIPQ"
+
+#include "vshuffle.inc"
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcreate.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcreate.c
@@ -0,0 +1,123 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xde, 0xbc, 0x9a,
+				       0x78, 0x56, 0x34, 0x12 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xdef0, 0x9abc, 0x5678, 0x1234 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x9abcdef0, 0x12345678 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x123456789abcdef0 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xde, 0xbc, 0x9a,
+					0x78, 0x56, 0x34, 0x12 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xdef0, 0x9abc, 0x5678, 0x1234 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x9abcdef0, 0x12345678 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x123456789abcdef0 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xde, 0xbc, 0x9a,
+					0x78, 0x56, 0x34, 0x12 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0xdef0, 0x9abc, 0x5678, 0x1234 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x9abcdef0, 0x12345678 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333,
+					0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+					0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333,
+					 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+					 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+					   0x33333333, 0x33333333 };
+
+#define INSN_NAME vcreate
+#define TEST_MSG "VCREATE"
+
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN_NAME)
+{
+  /* Basic test: y=vcreate(x), then store the result.  */
+#define TEST_VCREATE(T1, T2, W, N)					\
+  VECT_VAR(vector_res, T1, W, N) = vcreate_##T2##W(VECT_VAR(val, T1, W, N)); \
+  vst1_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+#define DECL_VAL(VAR, T1, W, N)			\
+  uint64_t VECT_VAR(VAR, T1, W, N)
+
+  DECL_VAL(val, int, 8, 8);
+  DECL_VAL(val, int, 16, 4);
+  DECL_VAL(val, int, 32, 2);
+  DECL_VAL(val, int, 64, 1);
+  DECL_VAL(val, float, 32, 2);
+  DECL_VAL(val, uint, 8, 8);
+  DECL_VAL(val, uint, 16, 4);
+  DECL_VAL(val, uint, 32, 2);
+  DECL_VAL(val, uint, 64, 1);
+  DECL_VAL(val, poly, 8, 8);
+  DECL_VAL(val, poly, 16, 4);
+
+  DECL_VARIABLE(vector_res, int, 8, 8);
+  DECL_VARIABLE(vector_res, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 2);
+  DECL_VARIABLE(vector_res, int, 64, 1);
+  DECL_VARIABLE(vector_res, float, 32, 2);
+  DECL_VARIABLE(vector_res, uint, 8, 8);
+  DECL_VARIABLE(vector_res, uint, 16, 4);
+  DECL_VARIABLE(vector_res, uint, 32, 2);
+  DECL_VARIABLE(vector_res, uint, 64, 1);
+  DECL_VARIABLE(vector_res, poly, 8, 8);
+  DECL_VARIABLE(vector_res, poly, 16, 4);
+
+  clean_results ();
+
+  /* Initialize input values arbitrarily.  */
+  VECT_VAR(val, int, 8, 8) = 0x123456789abcdef0LL;
+  VECT_VAR(val, int, 16, 4) = 0x123456789abcdef0LL;
+  VECT_VAR(val, int, 32, 2) = 0x123456789abcdef0LL;
+  VECT_VAR(val, int, 64, 1) = 0x123456789abcdef0LL;
+  VECT_VAR(val, float, 32, 2) = 0x123456789abcdef0LL;
+  VECT_VAR(val, uint, 8, 8) = 0x123456789abcdef0ULL;
+  VECT_VAR(val, uint, 16, 4) = 0x123456789abcdef0ULL;
+  VECT_VAR(val, uint, 32, 2) = 0x123456789abcdef0ULL;
+  VECT_VAR(val, uint, 64, 1) = 0x123456789abcdef0ULL;
+  VECT_VAR(val, poly, 8, 8) = 0x123456789abcdef0ULL;
+  VECT_VAR(val, poly, 16, 4) = 0x123456789abcdef0ULL;
+
+  TEST_VCREATE(int, s, 8, 8);
+  TEST_VCREATE(int, s, 16, 4);
+  TEST_VCREATE(int, s, 32, 2);
+  TEST_VCREATE(float, f, 32, 2);
+  TEST_VCREATE(int, s, 64, 1);
+  TEST_VCREATE(uint, u, 8, 8);
+  TEST_VCREATE(uint, u, 16, 4);
+  TEST_VCREATE(uint, u, 32, 2);
+  TEST_VCREATE(uint, u, 64, 1);
+  TEST_VCREATE(poly, p, 8, 8);
+  TEST_VCREATE(poly, p, 16, 4);
+
+  CHECK_RESULTS (TEST_MSG, "");
+}
+
+int main (void)
+{
+  exec_vcreate ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/cmp_fp_op.inc
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/cmp_fp_op.inc
@@ -0,0 +1,75 @@
+/* Template file for the validation of comparison operator with
+   floating-point support.
+
+   This file is meant to be included by the relevant test files, which
+   have to define the intrinsic family to test. If a given intrinsic
+   supports variants which are not supported by all the other
+   operators, these can be tested by providing a definition for
+   EXTRA_TESTS.  */
+
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Additional expected results declaration, they are initialized in
+   each test file.  */
+extern ARRAY(expected2, uint, 32, 2);
+extern ARRAY(expected2, uint, 32, 4);
+
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+  /* Basic test: y=vcomp(x1,x2), then store the result.  */
+#define TEST_VCOMP1(INSN, Q, T1, T2, T3, W, N)				\
+  VECT_VAR(vector_res, T3, W, N) =					\
+    INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N),			\
+		      VECT_VAR(vector2, T1, W, N));			\
+  vst1##Q##_u##W(VECT_VAR(result, T3, W, N), VECT_VAR(vector_res, T3, W, N))
+
+#define TEST_VCOMP(INSN, Q, T1, T2, T3, W, N)				\
+  TEST_VCOMP1(INSN, Q, T1, T2, T3, W, N)
+
+  DECL_VARIABLE(vector, float, 32, 2);
+  DECL_VARIABLE(vector, float, 32, 4);
+  DECL_VARIABLE(vector2, float, 32, 2);
+  DECL_VARIABLE(vector2, float, 32, 4);
+  DECL_VARIABLE(vector_res, uint, 32, 2);
+  DECL_VARIABLE(vector_res, uint, 32, 4);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer".  */
+  VLOAD(vector, buffer, , float, f, 32, 2);
+  VLOAD(vector, buffer, q, float, f, 32, 4);
+
+  /* Choose init value arbitrarily, will be used for vector
+     comparison.  */
+  VDUP(vector2, , float, f, 32, 2, -16.0f);
+  VDUP(vector2, q, float, f, 32, 4, -14.0f);
+
+  /* Apply operator named INSN_NAME.  */
+  TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2);
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, "");
+
+  TEST_VCOMP(INSN_NAME, q, float, f, uint, 32, 4);
+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, "");
+
+  /* Test again, with different input values.  */
+  VDUP(vector2, , float, f, 32, 2, -10.0f);
+  VDUP(vector2, q, float, f, 32, 4, 10.0f);
+
+  TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2);
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected2, "");
+
+  TEST_VCOMP(INSN_NAME, q, float, f, uint, 32, 4);
+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected2,"");
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlX_n.inc
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlX_n.inc
@@ -0,0 +1,87 @@
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+#define DECL_VMLX_N(VAR)			\
+  DECL_VARIABLE(VAR, int, 16, 4);		\
+  DECL_VARIABLE(VAR, int, 32, 2);		\
+  DECL_VARIABLE(VAR, uint, 16, 4);		\
+  DECL_VARIABLE(VAR, uint, 32, 2);		\
+  DECL_VARIABLE(VAR, float, 32, 2);		\
+  DECL_VARIABLE(VAR, int, 16, 8);		\
+  DECL_VARIABLE(VAR, int, 32, 4);		\
+  DECL_VARIABLE(VAR, uint, 16, 8);		\
+  DECL_VARIABLE(VAR, float, 32, 4);		\
+  DECL_VARIABLE(VAR, uint, 32, 4)
+
+  /* vector_res = vmlx_n(vector, vector2, val),
+     then store the result.  */
+#define TEST_VMLX_N1(INSN, Q, T1, T2, W, N, V)          \
+  VECT_VAR(vector_res, T1, W, N) =                      \
+    INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N),     \
+			VECT_VAR(vector2, T1, W, N),    \
+			V);                             \
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),         \
+		    VECT_VAR(vector_res, T1, W, N))
+
+#define TEST_VMLX_N(INSN, Q, T1, T2, W, N, V)	\
+  TEST_VMLX_N1(INSN, Q, T1, T2, W, N, V)
+
+  DECL_VMLX_N(vector);
+  DECL_VMLX_N(vector2);
+  DECL_VMLX_N(vector_res);
+
+  clean_results ();
+
+  VLOAD(vector, buffer, , int, s, 16, 4);
+  VLOAD(vector, buffer, , int, s, 32, 2);
+  VLOAD(vector, buffer, , uint, u, 16, 4);
+  VLOAD(vector, buffer, , uint, u, 32, 2);
+  VLOAD(vector, buffer, , float, f, 32, 2);
+  VLOAD(vector, buffer, q, int, s, 16, 8);
+  VLOAD(vector, buffer, q, int, s, 32, 4);
+  VLOAD(vector, buffer, q, uint, u, 16, 8);
+  VLOAD(vector, buffer, q, uint, u, 32, 4);
+  VLOAD(vector, buffer, q, float, f, 32, 4);
+
+  VDUP(vector2, , int, s, 16, 4, 0x55);
+  VDUP(vector2, , int, s, 32, 2, 0x55);
+  VDUP(vector2, , uint, u, 16, 4, 0x55);
+  VDUP(vector2, , uint, u, 32, 2, 0x55);
+  VDUP(vector2, , float, f, 32, 2, 55.2f);
+  VDUP(vector2, q, int, s, 16, 8, 0x55);
+  VDUP(vector2, q, int, s, 32, 4, 0x55);
+  VDUP(vector2, q, uint, u, 16, 8, 0x55);
+  VDUP(vector2, q, uint, u, 32, 4, 0x55);
+  VDUP(vector2, q, float, f, 32, 4, 55.9f);
+
+  /* Choose multiplier arbitrarily.  */
+  TEST_VMLX_N(INSN_NAME, , int, s, 16, 4, 0x11);
+  TEST_VMLX_N(INSN_NAME, , int, s, 32, 2, 0x22);
+  TEST_VMLX_N(INSN_NAME, , uint, u, 16, 4, 0x33);
+  TEST_VMLX_N(INSN_NAME, , uint, u, 32, 2, 0x44);
+  TEST_VMLX_N(INSN_NAME, , float, f, 32, 2, 22.3f);
+  TEST_VMLX_N(INSN_NAME, q, int, s, 16, 8, 0x55);
+  TEST_VMLX_N(INSN_NAME, q, int, s, 32, 4, 0x66);
+  TEST_VMLX_N(INSN_NAME, q, uint, u, 16, 8, 0x77);
+  TEST_VMLX_N(INSN_NAME, q, uint, u, 32, 4, 0x88);
+  TEST_VMLX_N(INSN_NAME, q, float, f, 32, 4, 66.7f);
+
+  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, "");
+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, "");
+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, "");
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, "");
+  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, "");
+  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, "");
+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, "");
+  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, "");
+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, "");
+  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, "");
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vXXXhn.inc
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vXXXhn.inc
@@ -0,0 +1,55 @@
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+  /* Basic test: vec64=vXXXhn(vec128_a, vec128_b), then store the result.  */
+#define TEST_VXXXHN1(INSN, T1, T2, W, W2, N)				\
+  VECT_VAR(vector64, T1, W2, N) = INSN##_##T2##W(VECT_VAR(vector1, T1, W, N), \
+						 VECT_VAR(vector2, T1, W, N)); \
+  vst1_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector64, T1, W2, N))
+
+#define TEST_VXXXHN(INSN, T1, T2, W, W2, N)	\
+  TEST_VXXXHN1(INSN, T1, T2, W, W2, N)
+
+  DECL_VARIABLE_64BITS_VARIANTS(vector64);
+  DECL_VARIABLE_128BITS_VARIANTS(vector1);
+  DECL_VARIABLE_128BITS_VARIANTS(vector2);
+
+  clean_results ();
+
+  /* Fill input vector1 and vector2 with arbitrary values */
+  VDUP(vector1, q, int, s, 16, 8, 50*(UINT8_MAX+1));
+  VDUP(vector1, q, int, s, 32, 4, 50*(UINT16_MAX+1));
+  VDUP(vector1, q, int, s, 64, 2, 24*((uint64_t)UINT32_MAX+1));
+  VDUP(vector1, q, uint, u, 16, 8, 3*(UINT8_MAX+1));
+  VDUP(vector1, q, uint, u, 32, 4, 55*(UINT16_MAX+1));
+  VDUP(vector1, q, uint, u, 64, 2, 3*((uint64_t)UINT32_MAX+1));
+
+  VDUP(vector2, q, int, s, 16, 8, (uint16_t)UINT8_MAX);
+  VDUP(vector2, q, int, s, 32, 4, (uint32_t)UINT16_MAX);
+  VDUP(vector2, q, int, s, 64, 2, (uint64_t)UINT32_MAX);
+  VDUP(vector2, q, uint, u, 16, 8, (uint16_t)UINT8_MAX);
+  VDUP(vector2, q, uint, u, 32, 4, (uint32_t)UINT16_MAX);
+  VDUP(vector2, q, uint, u, 64, 2, (uint64_t)UINT32_MAX);
+
+  TEST_VXXXHN(INSN_NAME, int, s, 16, 8, 8);
+  TEST_VXXXHN(INSN_NAME, int, s, 32, 16, 4);
+  TEST_VXXXHN(INSN_NAME, int, s, 64, 32, 2);
+  TEST_VXXXHN(INSN_NAME, uint, u, 16, 8, 8);
+  TEST_VXXXHN(INSN_NAME, uint, u, 32, 16, 4);
+  TEST_VXXXHN(INSN_NAME, uint, u, 64, 32, 2);
+
+  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, "");
+  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, "");
+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, "");
+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, "");
+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, "");
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, "");
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabs.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabs.c
@@ -0,0 +1,74 @@
+#define INSN_NAME vabs
+#define TEST_MSG "VABS/VABSQ"
+
+/* Extra tests for functions requiring floating-point types.  */
+void exec_vabs_f32(void);
+#define EXTRA_TESTS exec_vabs_f32
+
+#include "unary_op.inc"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x10, 0xf, 0xe, 0xd,
+				       0xc, 0xb, 0xa, 0x9 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x10, 0xf, 0xe, 0xd };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x10, 0xf };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9,
+					0x8, 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x10, 0xf, 0xe, 0xd,
+					0xc, 0xb, 0xa, 0x9 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x10, 0xf, 0xe, 0xd };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+					0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333,
+					 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+					 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+					  0x33333333, 0x33333333 };
+
+/* Expected results for float32 variants. Needs to be separated since
+   the generic test function does not test floating-point
+   versions.  */
+VECT_VAR_DECL(expected_float32,hfloat,32,2) [] = { 0x40133333, 0x40133333 };
+VECT_VAR_DECL(expected_float32,hfloat,32,4) [] = { 0x4059999a, 0x4059999a,
+						   0x4059999a, 0x4059999a };
+
+void exec_vabs_f32(void)
+{
+  DECL_VARIABLE(vector, float, 32, 2);
+  DECL_VARIABLE(vector, float, 32, 4);
+
+  DECL_VARIABLE(vector_res, float, 32, 2);
+  DECL_VARIABLE(vector_res, float, 32, 4);
+
+  VDUP(vector, , float, f, 32, 2, -2.3f);
+  VDUP(vector, q, float, f, 32, 4, 3.4f);
+
+  TEST_UNARY_OP(INSN_NAME, , float, f, 32, 2);
+  TEST_UNARY_OP(INSN_NAME, q, float, f, 32, 4);
+
+  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_float32, "");
+  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_float32, "");
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmlXl_n.inc
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmlXl_n.inc
@@ -0,0 +1,59 @@
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+  /* vector_res = vqdmlxl_n(vector, vector3, val),
+     then store the result.  */
+#define TEST_VQDMLXL_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
+  Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N));		\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    INSN##_##T2##W2(VECT_VAR(vector, T1, W, N),				\
+		    VECT_VAR(vector3, T1, W2, N),			\
+		    V);							\
+  vst1q_##T2##W(VECT_VAR(result, T1, W, N),				\
+		VECT_VAR(vector_res, T1, W, N));			\
+  CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
+
+#define TEST_VQDMLXL_N(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT) \
+  TEST_VQDMLXL_N1(INSN, T1, T2, W, W2, N, V, EXPECTED_CUMULATIVE_SAT, CMT)
+
+  DECL_VARIABLE(vector, int, 32, 4);
+  DECL_VARIABLE(vector3, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+
+  DECL_VARIABLE(vector, int, 64, 2);
+  DECL_VARIABLE(vector3, int, 32, 2);
+  DECL_VARIABLE(vector_res, int, 64, 2);
+
+  clean_results ();
+
+  VLOAD(vector, buffer, q, int, s, 32, 4);
+  VLOAD(vector, buffer, q, int, s, 64, 2);
+
+  VDUP(vector3, , int, s, 16, 4, 0x55);
+  VDUP(vector3, , int, s, 32, 2, 0x55);
+
+  /* Choose val arbitrarily.  */
+  TEST_VQDMLXL_N(INSN_NAME, int, s, 32, 16, 4, 0x22, expected_cumulative_sat, "");
+  TEST_VQDMLXL_N(INSN_NAME, int, s, 64, 32, 2, 0x33, expected_cumulative_sat, "");
+
+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, "");
+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, "");
+
+#define TEST_MSG2 "(check mul cumulative saturation)"
+  VDUP(vector3, , int, s, 16, 4, 0x8000);
+  VDUP(vector3, , int, s, 32, 2, 0x80000000);
+
+  TEST_VQDMLXL_N(INSN_NAME, int, s, 32, 16, 4, 0x8000, expected_cumulative_sat2, TEST_MSG2);
+  TEST_VQDMLXL_N(INSN_NAME, int, s, 64, 32, 2, 0x80000000, expected_cumulative_sat2, TEST_MSG2);
+
+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected2, TEST_MSG2);
+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected2, TEST_MSG2);
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlsl_lane.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlsl_lane.c
@@ -0,0 +1,18 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define INSN_NAME vmlsl_lane
+#define TEST_MSG "VMLSL_LANE"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffc1d9, 0xffffc1da,
+					0xffffc1db, 0xffffc1dc };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffc1d9,
+					0xffffffffffffc1da };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffc1d9, 0xffffc1da,
+					 0xffffc1db, 0xffffc1dc };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffc1d9,
+					 0xffffffffffffc1da };
+
+#include "vmlXl_lane.inc"
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlsl.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlsl.c
@@ -0,0 +1,22 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define INSN_NAME vmlsl
+#define TEST_MSG "VMLSL"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x16d9, 0x16da, 0x16db, 0x16dc,
+					0x16dd, 0x16de, 0x16df, 0x16e0 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffc1d9, 0xffffc1da,
+					0xffffc1db, 0xffffc1dc };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffc1d9,
+					0xffffffffffffc1da };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xc1d9, 0xc1da, 0xc1db, 0xc1dc,
+					 0xc1dd, 0xc1de, 0xc1df, 0xc1e0 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffc1d9, 0xffffc1da,
+					 0xffffc1db, 0xffffc1dc };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffc1d9,
+					 0xffffffffffffc1da };
+
+#include "vmlXl.inc"
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmla_lane.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmla_lane.c
@@ -0,0 +1,23 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define INSN_NAME vmla
+#define TEST_MSG "VMLA_LANE"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x3e07, 0x3e08 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x3e07, 0x3e08 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x4418c687, 0x44190687 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a,
+					0x3e0b, 0x3e0c, 0x3e0d, 0x3e0e };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a,
+					 0x3e0b, 0x3e0c, 0x3e0d, 0x3e0e };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x441a3168, 0x441a7168,
+					   0x441ab168, 0x441af168 };
+
+#include "vmlX_lane.inc"
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsri_n.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsri_n.c
@@ -0,0 +1,164 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define INSN_NAME vsri
+#define TEST_MSG "VSRI_N"
+
+/* Extra tests for functions requiring corner cases tests.  */
+void vsri_extra(void);
+#define EXTRA_TESTS vsri_extra
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
+				       0xf0, 0xf0, 0xf0, 0xf0 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x80000001, 0x80000001 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffff00000000 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xc5, 0xc5, 0xc5, 0xc5,
+					0xc5, 0xc5, 0xc5, 0xc5 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffc0, 0xffc0, 0xffc0, 0xffc0 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff0 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0xe000000000000000 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0xc5, 0xc5, 0xc5, 0xc5,
+					0xc5, 0xc5, 0xc5, 0xc5 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0xffc0, 0xffc0, 0xffc0, 0xffc0 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0xf7, 0xf7, 0xf7, 0xf7,
+					0xf7, 0xf7, 0xf7, 0xf7,
+					0xff, 0xff, 0xff, 0xff,
+					0xff, 0xff, 0xff, 0xff };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xfffd, 0xfffd, 0xfffd, 0xfffd,
+					0xfffd, 0xfffd, 0xfffd, 0xfffd };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffff, 0xffffffff,
+					0xffffffff, 0xffffffff };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0xffff000000000000,
+					0xffff000000000000 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xe1, 0xe1, 0xe1, 0xe1,
+					 0xe1, 0xe1, 0xe1, 0xe1,
+					 0xe1, 0xe1, 0xe1, 0xe1,
+					 0xe1, 0xe1, 0xe1, 0xe1 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0,
+					 0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffe00, 0xfffffe00,
+					 0xfffffe00, 0xfffffe00 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffff800,
+					 0xfffffffffffff800 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0xe1, 0xe1, 0xe1, 0xe1,
+					 0xe1, 0xe1, 0xe1, 0xe1,
+					 0xe1, 0xe1, 0xe1, 0xe1,
+					 0xe1, 0xe1, 0xe1, 0xe1 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0,
+					 0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+					   0x33333333, 0x33333333 };
+
+/* Expected results with max shift amount.  */
+VECT_VAR_DECL(expected_max_shift,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+						 0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected_max_shift,int,16,4) [] = { 0xfff0, 0xfff1,
+						  0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_max_shift,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected_max_shift,int,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_max_shift,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+						  0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected_max_shift,uint,16,4) [] = { 0xfff0, 0xfff1,
+						   0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_max_shift,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected_max_shift,uint,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_max_shift,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+						  0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected_max_shift,poly,16,4) [] = { 0xfff0, 0xfff1,
+						   0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected_max_shift,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected_max_shift,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+						  0xf4, 0xf5, 0xf6, 0xf7,
+						  0xf8, 0xf9, 0xfa, 0xfb,
+						  0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected_max_shift,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+						  0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_max_shift,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
+						  0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected_max_shift,int,64,2) [] = { 0xfffffffffffffff0,
+						  0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_max_shift,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+						   0xf4, 0xf5, 0xf6, 0xf7,
+						   0xf8, 0xf9, 0xfa, 0xfb,
+						   0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected_max_shift,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+						   0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_max_shift,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
+						   0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected_max_shift,uint,64,2) [] = { 0xfffffffffffffff0,
+						   0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_max_shift,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+						   0xf4, 0xf5, 0xf6, 0xf7,
+						   0xf8, 0xf9, 0xfa, 0xfb,
+						   0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected_max_shift,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+						   0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_max_shift,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+						     0x33333333, 0x33333333 };
+
+#include "vsXi_n.inc"
+
+void vsri_extra(void)
+{
+  /* Test cases with maximum shift amount (this amount is different
+     from vsli).  */
+
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+  DECL_VARIABLE_ALL_VARIANTS(vector2);
+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer".  */
+  TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
+
+  /* Fill input vector2 with arbitrary values.  */
+  VDUP(vector2, , int, s, 8, 8, 2);
+  VDUP(vector2, , int, s, 16, 4, -4);
+  VDUP(vector2, , int, s, 32, 2, 3);
+  VDUP(vector2, , int, s, 64, 1, 100);
+  VDUP(vector2, , uint, u, 8, 8, 20);
+  VDUP(vector2, , uint, u, 16, 4, 30);
+  VDUP(vector2, , uint, u, 32, 2, 40);
+  VDUP(vector2, , uint, u, 64, 1, 2);
+  VDUP(vector2, , poly, p, 8, 8, 20);
+  VDUP(vector2, , poly, p, 16, 4, 30);
+  VDUP(vector2, q, int, s, 8, 16, -10);
+  VDUP(vector2, q, int, s, 16, 8, -20);
+  VDUP(vector2, q, int, s, 32, 4, -30);
+  VDUP(vector2, q, int, s, 64, 2, 24);
+  VDUP(vector2, q, uint, u, 8, 16, 12);
+  VDUP(vector2, q, uint, u, 16, 8, 3);
+  VDUP(vector2, q, uint, u, 32, 4, 55);
+  VDUP(vector2, q, uint, u, 64, 2, 3);
+  VDUP(vector2, q, poly, p, 8, 16, 12);
+  VDUP(vector2, q, poly, p, 16, 8, 3);
+
+  /* Use maximum allowed shift amount.  */
+  TEST_VSXI_N(INSN_NAME, , int, s, 8, 8, 8);
+  TEST_VSXI_N(INSN_NAME, , int, s, 16, 4, 16);
+  TEST_VSXI_N(INSN_NAME, , int, s, 32, 2, 32);
+  TEST_VSXI_N(INSN_NAME, , int, s, 64, 1, 64);
+  TEST_VSXI_N(INSN_NAME, , uint, u, 8, 8, 8);
+  TEST_VSXI_N(INSN_NAME, , uint, u, 16, 4, 16);
+  TEST_VSXI_N(INSN_NAME, , uint, u, 32, 2, 32);
+  TEST_VSXI_N(INSN_NAME, , uint, u, 64, 1, 64);
+  TEST_VSXI_N(INSN_NAME, , poly, p, 8, 8, 8);
+  TEST_VSXI_N(INSN_NAME, , poly, p, 16, 4, 16);
+  TEST_VSXI_N(INSN_NAME, q, int, s, 8, 16, 8);
+  TEST_VSXI_N(INSN_NAME, q, int, s, 16, 8, 16);
+  TEST_VSXI_N(INSN_NAME, q, int, s, 32, 4, 32);
+  TEST_VSXI_N(INSN_NAME, q, int, s, 64, 2, 64);
+  TEST_VSXI_N(INSN_NAME, q, uint, u, 8, 16, 8);
+  TEST_VSXI_N(INSN_NAME, q, uint, u, 16, 8, 16);
+  TEST_VSXI_N(INSN_NAME, q, uint, u, 32, 4, 32);
+  TEST_VSXI_N(INSN_NAME, q, uint, u, 64, 2, 64);
+  TEST_VSXI_N(INSN_NAME, q, poly, p, 8, 16, 8);
+  TEST_VSXI_N(INSN_NAME, q, poly, p, 16, 8, 16);
+
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_max_shift, "(max shift amount)");
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_high.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_high.c
@@ -0,0 +1,86 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
+				       0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
+					0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
+					0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333,
+					0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+					0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333,
+					 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+					 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+					   0x33333333, 0x33333333 };
+
+#define TEST_MSG "VGET_HIGH"
+void exec_vget_high (void)
+{
+  /* Basic test: vec64=vget_high(vec128), then store the result.  */
+#define TEST_VGET_HIGH(T1, T2, W, N, N2)				\
+  VECT_VAR(vector64, T1, W, N) =					\
+    vget_high_##T2##W(VECT_VAR(vector128, T1, W, N2));			\
+  vst1_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector64, T1, W, N))
+
+  DECL_VARIABLE_64BITS_VARIANTS(vector64);
+  DECL_VARIABLE_128BITS_VARIANTS(vector128);
+
+  TEST_MACRO_128BITS_VARIANTS_2_5(VLOAD, vector128, buffer);
+  VLOAD(vector128, buffer, q, float, f, 32, 4);
+
+  clean_results ();
+
+  /* Execute the tests.  */
+  TEST_VGET_HIGH(int, s, 8, 8, 16);
+  TEST_VGET_HIGH(int, s, 16, 4, 8);
+  TEST_VGET_HIGH(int, s, 32, 2, 4);
+  TEST_VGET_HIGH(int, s, 64, 1, 2);
+  TEST_VGET_HIGH(uint, u, 8, 8, 16);
+  TEST_VGET_HIGH(uint, u, 16, 4, 8);
+  TEST_VGET_HIGH(uint, u, 32, 2, 4);
+  TEST_VGET_HIGH(uint, u, 64, 1, 2);
+  TEST_VGET_HIGH(poly, p, 8, 8, 16);
+  TEST_VGET_HIGH(poly, p, 16, 4, 8);
+  TEST_VGET_HIGH(float, f, 32, 2, 4);
+
+  CHECK_RESULTS (TEST_MSG, "");
+}
+
+int main (void)
+{
+  exec_vget_high ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddhn.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddhn.c
@@ -0,0 +1,24 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#if defined(__cplusplus)
+#include <cstdint>
+#else
+#include <stdint.h>
+#endif
+
+#define INSN_NAME vaddhn
+#define TEST_MSG "VADDHN"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x32, 0x32, 0x32, 0x32,
+				       0x32, 0x32, 0x32, 0x32 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x32, 0x32, 0x32, 0x32 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x18, 0x18 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x3, 0x3, 0x3, 0x3,
+					0x3, 0x3, 0x3, 0x3 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x37, 0x37, 0x37, 0x37 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x3, 0x3 };
+
+#include "vXXXhn.inc"
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddw.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddw.c
@@ -0,0 +1,51 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define INSN_NAME vaddw
+#define TEST_MSG "VADDW"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+				       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,8) [] = {  0xffe3, 0xffe4, 0xffe5, 0xffe6,
+					 0xffe7, 0xffe8, 0xffe9, 0xffea };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffe2, 0xffffffe3,
+					0xffffffe4, 0xffffffe5 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffe0,
+					0xffffffffffffffe1 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xe3, 0xe4, 0xe5, 0xe6,
+					 0xe7, 0xe8, 0xe9, 0xea };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffe1, 0xffe2,
+					 0xffe3, 0xffe4 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffe0, 0xffffffe1 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+					   0x33333333, 0x33333333 };
+
+#include "vXXXw.inc"
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1.c
@@ -0,0 +1,75 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+				       0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					0xf4, 0xf5, 0xf6, 0xf7,
+					0xf8, 0xf9, 0xfa, 0xfb,
+					0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+					0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
+					0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0,
+					0xfffffffffffffff1 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					 0xf4, 0xf5, 0xf6, 0xf7,
+					 0xf8, 0xf9, 0xfa, 0xfb,
+					 0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2,
+					 0xfff3, 0xfff4, 0xfff5,
+					 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
+					 0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0,
+					 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					 0xf4, 0xf5, 0xf6, 0xf7,
+					 0xf8, 0xf9, 0xfa, 0xfb,
+					 0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+					 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
+					   0xc1600000, 0xc1500000 };
+
+#define TEST_MSG "VLD1/VLD1Q"
+void exec_vld1 (void)
+{
+  /* Basic test vec=vld1(buffer); then store vec: vst1(result, vector).  */
+  /* This test actually tests vdl1 and vst1 at the same time.  */
+#define TEST_VLD1(VAR, BUF, Q, T1, T2, W, N)				\
+  VECT_VAR(VAR, T1, W, N) = vld1##Q##_##T2##W(VECT_VAR(BUF, T1, W, N)); \
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(VAR, T1, W, N))
+
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+
+  clean_results ();
+
+  TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLD1, vector, buffer);
+
+  TEST_VLD1(vector, buffer, , float, f, 32, 2);
+  TEST_VLD1(vector, buffer, q, float, f, 32, 4);
+
+  CHECK_RESULTS (TEST_MSG, "");
+}
+
+int main (void)
+{
+  exec_vld1 ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vclt.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vclt.c
@@ -0,0 +1,79 @@
+#define INSN_NAME vclt
+#define TEST_MSG "VCLT/VCLTQ"
+
+#include "cmp_op.inc"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+				       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
+					0xff, 0xff, 0x0, 0x0 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0x0, 0x0 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333,
+					0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333,
+					0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+					0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
+					 0xff, 0xff, 0xff, 0xff,
+					 0xff, 0xff, 0xff, 0xff,
+					 0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,
+					 0xffff, 0xffff, 0x0, 0x0 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, 0x0, 0x0 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+					 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+					   0x33333333, 0x33333333 };
+
+VECT_VAR_DECL(expected_uint,uint,8,8) [] = { 0xff, 0xff, 0xff, 0x0,
+					     0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_uint,uint,16,4) [] = { 0xffff, 0xffff, 0x0, 0x0 };
+VECT_VAR_DECL(expected_uint,uint,32,2) [] = { 0xffffffff, 0x0 };
+
+VECT_VAR_DECL(expected_q_uint,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
+						0x0, 0x0, 0x0, 0x0,
+						0x0, 0x0, 0x0, 0x0,
+						0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_q_uint,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,
+						0xffff, 0xffff, 0x0, 0x0 };
+VECT_VAR_DECL(expected_q_uint,uint,32,4) [] = { 0xffffffff, 0xffffffff,
+						0x0, 0x0 };
+
+VECT_VAR_DECL(expected_float,uint,32,2) [] = { 0xffffffff, 0x0 };
+VECT_VAR_DECL(expected_q_float,uint,32,4) [] = { 0xffffffff, 0xffffffff,
+						 0x0, 0x0 };
+
+VECT_VAR_DECL(expected_uint2,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_uint3,uint,32,2) [] = { 0xffffffff, 0x0 };
+VECT_VAR_DECL(expected_uint4,uint,32,2) [] = { 0x0, 0x0 };
+
+VECT_VAR_DECL(expected_nan,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_mnan,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_nan2,uint,32,2) [] = { 0x0, 0x0 };
+
+VECT_VAR_DECL(expected_inf,uint,32,2) [] = { 0xffffffff, 0xffffffff };
+VECT_VAR_DECL(expected_minf,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_inf2,uint,32,2) [] = { 0x0, 0x0 };
+
+VECT_VAR_DECL(expected_mzero,uint,32,2) [] = { 0x0, 0x0 };
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcalt.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcalt.c
@@ -0,0 +1,49 @@
+#define INSN_NAME vcalt
+#define TEST_MSG "VCALT/VCALTQ"
+
+#include "cmp_fp_op.inc"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+				       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0xffffffff };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333,
+					0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffff, 0xffffffff, 0x0, 0x0 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+					0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333,
+					 0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0xffffffff };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+					 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+					   0x33333333, 0x33333333 };
+
+VECT_VAR_DECL(expected2,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected2,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshuffle.inc
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshuffle.inc
@@ -0,0 +1,139 @@
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+  /* In this case, output variables are arrays of vectors.  */
+#define DECL_VSHUFFLE(T1, W, N)						\
+  VECT_ARRAY_TYPE(T1, W, N, 2) VECT_ARRAY_VAR(result_vec, T1, W, N, 2);	\
+  VECT_VAR_DECL(result_bis, T1, W, N)[2 * N]
+
+  /* We need to use a temporary result buffer (result_bis), because
+     the one used for other tests is not large enough. A subset of the
+     result data is moved from result_bis to result, and it is this
+     subset which is used to check the actual behaviour. The next
+     macro enables to move another chunk of data from result_bis to
+     result.  */
+#define TEST_VSHUFFLE(INSN, Q, T1, T2, W, N)				\
+  VECT_ARRAY_VAR(result_vec, T1, W, N, 2) =				\
+    INSN##Q##_##T2##W(VECT_VAR(vector1, T1, W, N),			\
+		      VECT_VAR(vector2, T1, W, N));			\
+  vst2##Q##_##T2##W(VECT_VAR(result_bis, T1, W, N),			\
+		    VECT_ARRAY_VAR(result_vec, T1, W, N, 2));		\
+  memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis, T1, W, N),	\
+	 sizeof(VECT_VAR(result, T1, W, N)));
+
+  /* Overwrite "result" with the contents of "result_bis"[X].  */
+#define TEST_EXTRA_CHUNK(T1, W, N, X)					\
+  memcpy(VECT_VAR(result, T1, W, N), &(VECT_VAR(result_bis, T1, W, N)[X*N]), \
+	 sizeof(VECT_VAR(result, T1, W, N)));
+
+  DECL_VARIABLE_ALL_VARIANTS(vector1);
+  DECL_VARIABLE_ALL_VARIANTS(vector2);
+
+  /* We don't need 64 bits variants.  */
+#define DECL_ALL_VSHUFFLE()				\
+  DECL_VSHUFFLE(int, 8, 8);				\
+  DECL_VSHUFFLE(int, 16, 4);				\
+  DECL_VSHUFFLE(int, 32, 2);				\
+  DECL_VSHUFFLE(uint, 8, 8);				\
+  DECL_VSHUFFLE(uint, 16, 4);				\
+  DECL_VSHUFFLE(uint, 32, 2);				\
+  DECL_VSHUFFLE(poly, 8, 8);				\
+  DECL_VSHUFFLE(poly, 16, 4);				\
+  DECL_VSHUFFLE(float, 32, 2);				\
+  DECL_VSHUFFLE(int, 8, 16);				\
+  DECL_VSHUFFLE(int, 16, 8);				\
+  DECL_VSHUFFLE(int, 32, 4);				\
+  DECL_VSHUFFLE(uint, 8, 16);				\
+  DECL_VSHUFFLE(uint, 16, 8);				\
+  DECL_VSHUFFLE(uint, 32, 4);				\
+  DECL_VSHUFFLE(poly, 8, 16);				\
+  DECL_VSHUFFLE(poly, 16, 8);				\
+  DECL_VSHUFFLE(float, 32, 4)
+
+  DECL_ALL_VSHUFFLE();
+
+  /* Initialize input "vector" from "buffer".  */
+  TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector1, buffer);
+  VLOAD(vector1, buffer, , float, f, 32, 2);
+  VLOAD(vector1, buffer, q, float, f, 32, 4);
+
+  /* Choose arbitrary initialization values.  */
+  VDUP(vector2, , int, s, 8, 8, 0x11);
+  VDUP(vector2, , int, s, 16, 4, 0x22);
+  VDUP(vector2, , int, s, 32, 2, 0x33);
+  VDUP(vector2, , uint, u, 8, 8, 0x55);
+  VDUP(vector2, , uint, u, 16, 4, 0x66);
+  VDUP(vector2, , uint, u, 32, 2, 0x77);
+  VDUP(vector2, , poly, p, 8, 8, 0x55);
+  VDUP(vector2, , poly, p, 16, 4, 0x66);
+  VDUP(vector2, , float, f, 32, 2, 33.6f);
+
+  VDUP(vector2, q, int, s, 8, 16, 0x11);
+  VDUP(vector2, q, int, s, 16, 8, 0x22);
+  VDUP(vector2, q, int, s, 32, 4, 0x33);
+  VDUP(vector2, q, uint, u, 8, 16, 0x55);
+  VDUP(vector2, q, uint, u, 16, 8, 0x66);
+  VDUP(vector2, q, uint, u, 32, 4, 0x77);
+  VDUP(vector2, q, poly, p, 8, 16, 0x55);
+  VDUP(vector2, q, poly, p, 16, 8, 0x66);
+  VDUP(vector2, q, float, f, 32, 4, 33.8f);
+  
+#define TEST_ALL_VSHUFFLE(INSN)				\
+  TEST_VSHUFFLE(INSN, , int, s, 8, 8);			\
+  TEST_VSHUFFLE(INSN, , int, s, 16, 4);			\
+  TEST_VSHUFFLE(INSN, , int, s, 32, 2);			\
+  TEST_VSHUFFLE(INSN, , uint, u, 8, 8);			\
+  TEST_VSHUFFLE(INSN, , uint, u, 16, 4);		\
+  TEST_VSHUFFLE(INSN, , uint, u, 32, 2);		\
+  TEST_VSHUFFLE(INSN, , poly, p, 8, 8);			\
+  TEST_VSHUFFLE(INSN, , poly, p, 16, 4);		\
+  TEST_VSHUFFLE(INSN, , float, f, 32, 2);		\
+  TEST_VSHUFFLE(INSN, q, int, s, 8, 16);		\
+  TEST_VSHUFFLE(INSN, q, int, s, 16, 8);		\
+  TEST_VSHUFFLE(INSN, q, int, s, 32, 4);		\
+  TEST_VSHUFFLE(INSN, q, uint, u, 8, 16);		\
+  TEST_VSHUFFLE(INSN, q, uint, u, 16, 8);		\
+  TEST_VSHUFFLE(INSN, q, uint, u, 32, 4);		\
+  TEST_VSHUFFLE(INSN, q, poly, p, 8, 16);		\
+  TEST_VSHUFFLE(INSN, q, poly, p, 16, 8);		\
+  TEST_VSHUFFLE(INSN, q, float, f, 32, 4)
+
+#define TEST_ALL_EXTRA_CHUNKS()			\
+  TEST_EXTRA_CHUNK(int, 8, 8, 1);		\
+  TEST_EXTRA_CHUNK(int, 16, 4, 1);		\
+  TEST_EXTRA_CHUNK(int, 32, 2, 1);		\
+  TEST_EXTRA_CHUNK(uint, 8, 8, 1);		\
+  TEST_EXTRA_CHUNK(uint, 16, 4, 1);		\
+  TEST_EXTRA_CHUNK(uint, 32, 2, 1);		\
+  TEST_EXTRA_CHUNK(poly, 8, 8, 1);		\
+  TEST_EXTRA_CHUNK(poly, 16, 4, 1);		\
+  TEST_EXTRA_CHUNK(float, 32, 2, 1);		\
+  TEST_EXTRA_CHUNK(int, 8, 16, 1);		\
+  TEST_EXTRA_CHUNK(int, 16, 8, 1);		\
+  TEST_EXTRA_CHUNK(int, 32, 4, 1);		\
+  TEST_EXTRA_CHUNK(uint, 8, 16, 1);		\
+  TEST_EXTRA_CHUNK(uint, 16, 8, 1);		\
+  TEST_EXTRA_CHUNK(uint, 32, 4, 1);		\
+  TEST_EXTRA_CHUNK(poly, 8, 16, 1);		\
+  TEST_EXTRA_CHUNK(poly, 16, 8, 1);		\
+  TEST_EXTRA_CHUNK(float, 32, 4, 1)
+
+  clean_results ();
+
+  /* Execute the tests.  */
+  TEST_ALL_VSHUFFLE(INSN_NAME);
+
+  CHECK_RESULTS_NAMED (TEST_MSG, expected0, "(chunk 0)");
+
+  TEST_ALL_EXTRA_CHUNKS();
+  CHECK_RESULTS_NAMED (TEST_MSG, expected1, "(chunk 1)");
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshl.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshl.c
@@ -0,0 +1,230 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xe0, 0xe2, 0xe4, 0xe6,
+				       0xe8, 0xea, 0xec, 0xee };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xff80, 0xff88, 0xff90, 0xff98 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffff000, 0xfffff100 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffff80 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xe0, 0xe2, 0xe4, 0xe6,
+					0xe8, 0xea, 0xec, 0xee };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xff80, 0xff88, 0xff90, 0xff98 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffff000, 0xfffff100 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffff80 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333,
+					 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x20, 0x40, 0x60,
+					0x80, 0xa0, 0xc0, 0xe0,
+					0x0, 0x20, 0x40, 0x60,
+					0x80, 0xa0, 0xc0, 0xe0 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x1000, 0x2000, 0x3000,
+					0x4000, 0x5000, 0x6000, 0x7000 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x40000000,
+					0x80000000, 0xc0000000 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x8000000000000000 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x20, 0x40, 0x60,
+					 0x80, 0xa0, 0xc0, 0xe0,
+					 0x0, 0x20, 0x40, 0x60,
+					 0x80, 0xa0, 0xc0, 0xe0 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x1000, 0x2000, 0x3000,
+					 0x4000, 0x5000, 0x6000, 0x7000 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x40000000,
+					 0x80000000, 0xc0000000 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x0, 0x8000000000000000 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+					   0x33333333, 0x33333333 };
+
+/* Expected results with large shift amount.  */
+VECT_VAR_DECL(expected_large_shift,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
+						   0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_large_shift,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_large_shift,int,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_large_shift,int,64,1) [] = { 0x0 };
+VECT_VAR_DECL(expected_large_shift,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
+						    0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_large_shift,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_large_shift,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_large_shift,uint,64,1) [] = { 0x0 };
+VECT_VAR_DECL(expected_large_shift,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+						    0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_large_shift,poly,16,4) [] = { 0x3333, 0x3333,
+						     0x3333, 0x3333 };
+VECT_VAR_DECL(expected_large_shift,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected_large_shift,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
+						    0x0, 0x0, 0x0, 0x0,
+						    0x0, 0x0, 0x0, 0x0,
+						    0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_large_shift,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
+						    0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_large_shift,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_large_shift,int,64,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_large_shift,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
+						     0x0, 0x0, 0x0, 0x0,
+						     0x0, 0x0, 0x0, 0x0,
+						     0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_large_shift,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
+						     0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_large_shift,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_large_shift,uint,64,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_large_shift,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+						     0x33, 0x33, 0x33, 0x33,
+						     0x33, 0x33, 0x33, 0x33,
+						     0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_large_shift,poly,16,8) [] = { 0x3333, 0x3333,
+						     0x3333, 0x3333,
+						     0x3333, 0x3333,
+						     0x3333, 0x3333 };
+VECT_VAR_DECL(expected_large_shift,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+						       0x33333333, 0x33333333 };
+
+
+/* Expected results with negative shift amount.  */
+VECT_VAR_DECL(expected_negative_shift,int,8,8) [] = { 0xf8, 0xf8, 0xf9, 0xf9,
+						      0xfa, 0xfa, 0xfb, 0xfb };
+VECT_VAR_DECL(expected_negative_shift,int,16,4) [] = { 0xfff8, 0xfff8,
+						       0xfff9, 0xfff9  };
+VECT_VAR_DECL(expected_negative_shift,int,32,2) [] = { 0xfffffffc, 0xfffffffc };
+VECT_VAR_DECL(expected_negative_shift,int,64,1) [] = { 0xffffffffffffffff };
+VECT_VAR_DECL(expected_negative_shift,uint,8,8) [] = { 0x78, 0x78, 0x79, 0x79,
+						       0x7a, 0x7a, 0x7b, 0x7b };
+VECT_VAR_DECL(expected_negative_shift,uint,16,4) [] = { 0x7ff8, 0x7ff8,
+							0x7ff9, 0x7ff9 };
+VECT_VAR_DECL(expected_negative_shift,uint,32,2) [] = { 0x3ffffffc,
+							0x3ffffffc };
+VECT_VAR_DECL(expected_negative_shift,uint,64,1) [] = { 0xfffffffffffffff };
+VECT_VAR_DECL(expected_negative_shift,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+						       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_negative_shift,poly,16,4) [] = { 0x3333, 0x3333,
+							0x3333, 0x3333 };
+VECT_VAR_DECL(expected_negative_shift,hfloat,32,2) [] = { 0x33333333,
+							  0x33333333 };
+VECT_VAR_DECL(expected_negative_shift,int,8,16) [] = { 0xfc, 0xfc, 0xfc, 0xfc,
+						       0xfd, 0xfd, 0xfd, 0xfd,
+						       0xfe, 0xfe, 0xfe, 0xfe,
+						       0xff, 0xff, 0xff, 0xff };
+VECT_VAR_DECL(expected_negative_shift,int,16,8) [] = { 0xffff, 0xffff,
+						       0xffff, 0xffff,
+						       0xffff, 0xffff,
+						       0xffff, 0xffff };
+VECT_VAR_DECL(expected_negative_shift,int,32,4) [] = {  0xfffffffe, 0xfffffffe,
+							0xfffffffe, 0xfffffffe };
+VECT_VAR_DECL(expected_negative_shift,int,64,2) [] = { 0xffffffffffffffff,
+						       0xffffffffffffffff };
+VECT_VAR_DECL(expected_negative_shift,uint,8,16) [] = { 0x3c, 0x3c, 0x3c, 0x3c,
+							0x3d, 0x3d, 0x3d, 0x3d,
+							0x3e, 0x3e, 0x3e, 0x3e,
+							0x3f, 0x3f, 0x3f, 0x3f };
+VECT_VAR_DECL(expected_negative_shift,uint,16,8) [] = { 0x7ff, 0x7ff,
+							0x7ff, 0x7ff,
+							0x7ff, 0x7ff,
+							0x7ff, 0x7ff };
+VECT_VAR_DECL(expected_negative_shift,uint,32,4) [] = { 0x1ffffffe, 0x1ffffffe,
+							0x1ffffffe, 0x1ffffffe };
+VECT_VAR_DECL(expected_negative_shift,uint,64,2) [] = { 0x7ffffffffffffff,
+							0x7ffffffffffffff };
+VECT_VAR_DECL(expected_negative_shift,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+							0x33, 0x33, 0x33, 0x33,
+							0x33, 0x33, 0x33, 0x33,
+							0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_negative_shift,poly,16,8) [] = { 0x3333, 0x3333,
+							0x3333, 0x3333,
+							0x3333, 0x3333,
+							0x3333, 0x3333 };
+VECT_VAR_DECL(expected_negative_shift,hfloat,32,4) [] = { 0x33333333,
+							  0x33333333,
+							  0x33333333,
+							  0x33333333 };
+
+
+#ifndef INSN_NAME
+#define INSN_NAME vshl
+#define TEST_MSG "VSHL/VSHLQ"
+#endif
+
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+  /* Basic test: v3=vshl(v1,v2), then store the result.  */
+#define TEST_VSHL(T3, Q, T1, T2, W, N)					\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    vshl##Q##_##T2##W(VECT_VAR(vector, T1, W, N),			\
+		      VECT_VAR(vector_shift, T3, W, N));		\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+  DECL_VARIABLE_SIGNED_VARIANTS(vector_shift);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer".  */
+  TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
+
+  /* Choose init value arbitrarily, will be used as shift amount.  */
+  VDUP(vector_shift, , int, s, 8, 8, 1);
+  VDUP(vector_shift, , int, s, 16, 4, 3);
+  VDUP(vector_shift, , int, s, 32, 2, 8);
+  VDUP(vector_shift, , int, s, 64, 1, 3);
+  VDUP(vector_shift, q, int, s, 8, 16, 5);
+  VDUP(vector_shift, q, int, s, 16, 8, 12);
+  VDUP(vector_shift, q, int, s, 32, 4, 30);
+  VDUP(vector_shift, q, int, s, 64, 2, 63);
+
+  /* Execute the tests.  */
+  TEST_MACRO_ALL_VARIANTS_1_5(TEST_VSHL, int);
+
+  CHECK_RESULTS (TEST_MSG, "");
+
+
+  /* Test large shift amount (larger or equal to the type width.  */
+  VDUP(vector_shift, , int, s, 8, 8, 8);
+  VDUP(vector_shift, , int, s, 16, 4, 16);
+  VDUP(vector_shift, , int, s, 32, 2, 32);
+  VDUP(vector_shift, , int, s, 64, 1, 64);
+  VDUP(vector_shift, q, int, s, 8, 16, 8);
+  VDUP(vector_shift, q, int, s, 16, 8, 17);
+  VDUP(vector_shift, q, int, s, 32, 4, 33);
+  VDUP(vector_shift, q, int, s, 64, 2, 65);
+
+  /* Execute the tests.  */
+  TEST_MACRO_ALL_VARIANTS_1_5(TEST_VSHL, int);
+
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_large_shift, "(large shift amount)");
+
+
+  /* Test negative shift amount. */
+  VDUP(vector_shift, , int, s, 8, 8, -1);
+  VDUP(vector_shift, , int, s, 16, 4, -1);
+  VDUP(vector_shift, , int, s, 32, 2, -2);
+  VDUP(vector_shift, , int, s, 64, 1, -4);
+  VDUP(vector_shift, q, int, s, 8, 16, -2);
+  VDUP(vector_shift, q, int, s, 16, 8, -5);
+  VDUP(vector_shift, q, int, s, 32, 4, -3);
+  VDUP(vector_shift, q, int, s, 64, 2, -5);
+
+  /* Execute the tests.  */
+  TEST_MACRO_ALL_VARIANTS_1_5(TEST_VSHL, int);
+
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_negative_shift, "(negative shift amount)");
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlX.inc
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlX.inc
@@ -0,0 +1,123 @@
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+#define DECL_VMLX(T, W, N)			\
+  DECL_VARIABLE(vector1, T, W, N);		\
+  DECL_VARIABLE(vector2, T, W, N);		\
+  DECL_VARIABLE(vector3, T, W, N);		\
+  DECL_VARIABLE(vector_res, T, W, N)
+
+  /* vector_res = vmla(vector, vector3, vector4),
+     then store the result.  */
+#define TEST_VMLX1(INSN, Q, T1, T2, W, N)				\
+  VECT_VAR(vector_res, T1, W, N) =                                      \
+    INSN##Q##_##T2##W(VECT_VAR(vector1, T1, W, N),			\
+		      VECT_VAR(vector2, T1, W, N),			\
+		      VECT_VAR(vector3, T1, W, N));			\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),                         \
+		    VECT_VAR(vector_res, T1, W, N))
+
+#define TEST_VMLX(INSN, Q, T1, T2, W, N)	\
+  TEST_VMLX1(INSN, Q, T1, T2, W, N)
+
+  DECL_VMLX(int, 8, 8);
+  DECL_VMLX(int, 16, 4);
+  DECL_VMLX(int, 32, 2);
+  DECL_VMLX(uint, 8, 8);
+  DECL_VMLX(uint, 16, 4);
+  DECL_VMLX(uint, 32, 2);
+  DECL_VMLX(float, 32, 2);
+  DECL_VMLX(int, 8, 16);
+  DECL_VMLX(int, 16, 8);
+  DECL_VMLX(int, 32, 4);
+  DECL_VMLX(uint, 8, 16);
+  DECL_VMLX(uint, 16, 8);
+  DECL_VMLX(uint, 32, 4);
+  DECL_VMLX(float, 32, 4);
+
+  clean_results ();
+
+  VLOAD(vector1, buffer, , int, s, 8, 8);
+  VLOAD(vector1, buffer, , int, s, 16, 4);
+  VLOAD(vector1, buffer, , int, s, 32, 2);
+  VLOAD(vector1, buffer, , uint, u, 8, 8);
+  VLOAD(vector1, buffer, , uint, u, 16, 4);
+  VLOAD(vector1, buffer, , uint, u, 32, 2);
+  VLOAD(vector1, buffer, , float, f, 32, 2);
+  VLOAD(vector1, buffer, q, int, s, 8, 16);
+  VLOAD(vector1, buffer, q, int, s, 16, 8);
+  VLOAD(vector1, buffer, q, int, s, 32, 4);
+  VLOAD(vector1, buffer, q, uint, u, 8, 16);
+  VLOAD(vector1, buffer, q, uint, u, 16, 8);
+  VLOAD(vector1, buffer, q, uint, u, 32, 4);
+  VLOAD(vector1, buffer, q, float, f, 32, 4);
+
+  VDUP(vector2, , int, s, 8, 8, 0x11);
+  VDUP(vector2, , int, s, 16, 4, 0x22);
+  VDUP(vector2, , int, s, 32, 2, 0x33);
+  VDUP(vector2, , uint, u, 8, 8, 0x44);
+  VDUP(vector2, , uint, u, 16, 4, 0x55);
+  VDUP(vector2, , uint, u, 32, 2, 0x66);
+  VDUP(vector2, , float, f, 32, 2, 33.1f);
+  VDUP(vector2, q, int, s, 8, 16, 0x77);
+  VDUP(vector2, q, int, s, 16, 8, 0x88);
+  VDUP(vector2, q, int, s, 32, 4, 0x99);
+  VDUP(vector2, q, uint, u, 8, 16, 0xAA);
+  VDUP(vector2, q, uint, u, 16, 8, 0xBB);
+  VDUP(vector2, q, uint, u, 32, 4, 0xCC);
+  VDUP(vector2, q, float, f, 32, 4, 99.2f);
+
+  VDUP(vector3, , int, s, 8, 8, 0xFF);
+  VDUP(vector3, , int, s, 16, 4, 0xEE);
+  VDUP(vector3, , int, s, 32, 2, 0xDD);
+  VDUP(vector3, , uint, u, 8, 8, 0xCC);
+  VDUP(vector3, , uint, u, 16, 4, 0xBB);
+  VDUP(vector3, , uint, u, 32, 2, 0xAA);
+  VDUP(vector3, , float, f, 32, 2, 10.23f);
+  VDUP(vector3, q, int, s, 8, 16, 0x99);
+  VDUP(vector3, q, int, s, 16, 8, 0x88);
+  VDUP(vector3, q, int, s, 32, 4, 0x77);
+  VDUP(vector3, q, uint, u, 8, 16, 0x66);
+  VDUP(vector3, q, uint, u, 16, 8, 0x55);
+  VDUP(vector3, q, uint, u, 32, 4, 0x44);
+  VDUP(vector3, q, float, f, 32, 4, 77.8f);
+
+  TEST_VMLX(INSN_NAME, , int, s, 8, 8);
+  TEST_VMLX(INSN_NAME, , int, s, 16, 4);
+  TEST_VMLX(INSN_NAME, , int, s, 32, 2);
+  TEST_VMLX(INSN_NAME, , uint, u, 8, 8);
+  TEST_VMLX(INSN_NAME, , uint, u, 16, 4);
+  TEST_VMLX(INSN_NAME, , uint, u, 32, 2);
+  TEST_VMLX(INSN_NAME, , float, f, 32, 2);
+  TEST_VMLX(INSN_NAME, q, int, s, 8, 16);
+  TEST_VMLX(INSN_NAME, q, int, s, 16, 8);
+  TEST_VMLX(INSN_NAME, q, int, s, 32, 4);
+  TEST_VMLX(INSN_NAME, q, uint, u, 8, 16);
+  TEST_VMLX(INSN_NAME, q, uint, u, 16, 8);
+  TEST_VMLX(INSN_NAME, q, uint, u, 32, 4);
+  TEST_VMLX(INSN_NAME, q, float, f, 32, 4);
+
+  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, "");
+  CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, "");
+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, "");
+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, "");
+  CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, "");
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, "");
+  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, "");
+  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, "");
+  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, "");
+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, "");
+  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, "");
+  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, "");
+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, "");
+  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, "");
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c
@@ -0,0 +1,100 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
+				       0xf1, 0xf1, 0xf1, 0xf1 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff1, 0xfffffff1 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf7, 0xf7, 0xf7, 0xf7,
+					0xf7, 0xf7, 0xf7, 0xf7 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff1, 0xfffffff1 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf7, 0xf7, 0xf7, 0xf7,
+					0xf7, 0xf7, 0xf7, 0xf7 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
+					0xf2, 0xf2, 0xf2, 0xf2,
+					0xf2, 0xf2, 0xf2, 0xf2,
+					0xf2, 0xf2, 0xf2, 0xf2 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3,
+					0xfff3, 0xfff3, 0xfff3, 0xfff3 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff1, 0xfffffff1,
+					0xfffffff1, 0xfffffff1 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0,
+					0xfffffffffffffff0 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf5, 0xf5, 0xf5, 0xf5,
+					 0xf5, 0xf5, 0xf5, 0xf5,
+					 0xf5, 0xf5, 0xf5, 0xf5,
+					 0xf5, 0xf5, 0xf5, 0xf5 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1,
+					 0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff0,
+					 0xfffffff0, 0xfffffff0 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0,
+					 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf5, 0xf5, 0xf5, 0xf5,
+					 0xf5, 0xf5, 0xf5, 0xf5,
+					 0xf5, 0xf5, 0xf5, 0xf5,
+					 0xf5, 0xf5, 0xf5, 0xf5 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1,
+					 0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1700000, 0xc1700000,
+					   0xc1700000, 0xc1700000 };
+
+#define TEST_MSG "VDUP_LANE/VDUP_LANEQ"
+void exec_vdup_lane (void)
+{
+  /* Basic test: vec1=vdup_lane(vec2, lane), then store the result.  */
+#define TEST_VDUP_LANE(Q, T1, T2, W, N, N2, L)				\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    vdup##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N2), L);		\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+  /* Input vector can only have 64 bits.  */
+  DECL_VARIABLE_64BITS_VARIANTS(vector);
+
+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+  clean_results ();
+
+  TEST_MACRO_64BITS_VARIANTS_2_5(VLOAD, vector, buffer);
+  VLOAD(vector, buffer, , float, f, 32, 2);
+
+  /* Choose lane arbitrarily.  */
+  TEST_VDUP_LANE(, int, s, 8, 8, 8, 1);
+  TEST_VDUP_LANE(, int, s, 16, 4, 4, 2);
+  TEST_VDUP_LANE(, int, s, 32, 2, 2, 1);
+  TEST_VDUP_LANE(, int, s, 64, 1, 1, 0);
+  TEST_VDUP_LANE(, uint, u, 8, 8, 8, 7);
+  TEST_VDUP_LANE(, uint, u, 16, 4, 4, 3);
+  TEST_VDUP_LANE(, uint, u, 32, 2, 2, 1);
+  TEST_VDUP_LANE(, uint, u, 64, 1, 1, 0);
+  TEST_VDUP_LANE(, poly, p, 8, 8, 8, 7);
+  TEST_VDUP_LANE(, poly, p, 16, 4, 4, 3);
+  TEST_VDUP_LANE(, float, f, 32, 2, 2, 1);
+
+  TEST_VDUP_LANE(q, int, s, 8, 16, 8, 2);
+  TEST_VDUP_LANE(q, int, s, 16, 8, 4, 3);
+  TEST_VDUP_LANE(q, int, s, 32, 4, 2, 1);
+  TEST_VDUP_LANE(q, int, s, 64, 2, 1, 0);
+  TEST_VDUP_LANE(q, uint, u, 8, 16, 8, 5);
+  TEST_VDUP_LANE(q, uint, u, 16, 8, 4, 1);
+  TEST_VDUP_LANE(q, uint, u, 32, 4, 2, 0);
+  TEST_VDUP_LANE(q, uint, u, 64, 2, 1, 0);
+  TEST_VDUP_LANE(q, poly, p, 8, 16, 8, 5);
+  TEST_VDUP_LANE(q, poly, p, 16, 8, 4, 1);
+  TEST_VDUP_LANE(q, float, f, 32, 4, 2, 1);
+
+  CHECK_RESULTS (TEST_MSG, "");
+}
+
+int main (void)
+{
+  exec_vdup_lane ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmlal_n.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmlal_n.c
@@ -0,0 +1,27 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define INSN_NAME vqdmlal_n
+#define TEST_MSG "VQDMLAL_N"
+
+/* Expected values of cumulative_saturation flag.  */
+int VECT_VAR(expected_cumulative_sat,int,32,4) = 0;
+int VECT_VAR(expected_cumulative_sat,int,64,2) = 0;
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x1684, 0x1685, 0x1686, 0x1687 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x21ce, 0x21cf };
+
+/* Expected values of cumulative_saturation flag when saturation
+   occurs.  */
+int VECT_VAR(expected_cumulative_sat2,int,32,4) = 1;
+int VECT_VAR(expected_cumulative_sat2,int,64,2) = 1;
+
+/* Expected results when saturation occurs.  */
+VECT_VAR_DECL(expected2,int,32,4) [] = { 0x7fffffef, 0x7ffffff0,
+					 0x7ffffff1, 0x7ffffff2 };
+VECT_VAR_DECL(expected2,int,64,2) [] = { 0x7fffffffffffffef,
+					 0x7ffffffffffffff0 };
+
+#include "vqdmlXl_n.inc"
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vclz.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vclz.c
@@ -0,0 +1,194 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x3, 0x3, 0x3, 0x3 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x11, 0x11 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x5, 0x5 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2,
+					0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x3, 0x3, 0x3, 0x3 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+					0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3,
+					 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xd, 0xd, 0xd, 0xd,
+					 0xd, 0xd, 0xd, 0xd };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x1f, 0x1f, 0x1f, 0x1f };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+					 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+					   0x33333333, 0x33333333 };
+
+
+/* Expected results with input=0.  */
+VECT_VAR_DECL(expected_with_0,int,8,8) [] = { 0x8, 0x8, 0x8, 0x8,
+					      0x8, 0x8, 0x8, 0x8 };
+VECT_VAR_DECL(expected_with_0,int,16,4) [] = { 0x10, 0x10, 0x10, 0x10 };
+VECT_VAR_DECL(expected_with_0,int,32,2) [] = { 0x20, 0x20 };
+VECT_VAR_DECL(expected_with_0,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected_with_0,uint,8,8) [] = { 0x8, 0x8, 0x8, 0x8,
+					       0x8, 0x8, 0x8, 0x8 };
+VECT_VAR_DECL(expected_with_0,uint,16,4) [] = { 0x10, 0x10, 0x10, 0x10 };
+VECT_VAR_DECL(expected_with_0,uint,32,2) [] = { 0x20, 0x20 };
+VECT_VAR_DECL(expected_with_0,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected_with_0,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_with_0,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected_with_0,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected_with_0,int,8,16) [] = { 0x8, 0x8, 0x8, 0x8,
+					       0x8, 0x8, 0x8, 0x8,
+					       0x8, 0x8, 0x8, 0x8,
+					       0x8, 0x8, 0x8, 0x8 };
+VECT_VAR_DECL(expected_with_0,int,16,8) [] = { 0x10, 0x10, 0x10, 0x10,
+					       0x10, 0x10, 0x10, 0x10 };
+VECT_VAR_DECL(expected_with_0,int,32,4) [] = { 0x20, 0x20, 0x20, 0x20 };
+VECT_VAR_DECL(expected_with_0,int,64,2) [] = { 0x3333333333333333,
+					       0x3333333333333333 };
+VECT_VAR_DECL(expected_with_0,uint,8,16) [] = { 0x8, 0x8, 0x8, 0x8,
+						0x8, 0x8, 0x8, 0x8,
+						0x8, 0x8, 0x8, 0x8,
+						0x8, 0x8, 0x8, 0x8 };
+VECT_VAR_DECL(expected_with_0,uint,16,8) [] = { 0x10, 0x10, 0x10, 0x10,
+						0x10, 0x10, 0x10, 0x10 };
+VECT_VAR_DECL(expected_with_0,uint,32,4) [] = { 0x20, 0x20, 0x20, 0x20 };
+VECT_VAR_DECL(expected_with_0,uint,64,2) [] = { 0x3333333333333333,
+						0x3333333333333333 };
+VECT_VAR_DECL(expected_with_0,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33,
+						0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_with_0,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+						0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected_with_0,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+						  0x33333333, 0x33333333 };
+
+#define INSN_NAME vclz
+#define TEST_MSG "VCLZ/VCLZQ"
+
+#define FNNAME1(NAME) void exec_ ## NAME (void)
+#define FNNAME(NAME) FNNAME1(NAME)
+
+FNNAME (INSN_NAME)
+{
+  /* Basic test: y=vclz(x), then store the result.  */
+#define TEST_UNARY_OP1(INSN, Q, T1, T2, W, N)				\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N));			\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+#define TEST_UNARY_OP(INSN, Q, T1, T2, W, N)	\
+  TEST_UNARY_OP1(INSN, Q, T1, T2, W, N)		\
+
+  /* No need for 64 bits variants */
+  DECL_VARIABLE(vector, int, 8, 8);
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, uint, 8, 8);
+  DECL_VARIABLE(vector, uint, 16, 4);
+  DECL_VARIABLE(vector, uint, 32, 2);
+  DECL_VARIABLE(vector, int, 8, 16);
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector, int, 32, 4);
+  DECL_VARIABLE(vector, uint, 8, 16);
+  DECL_VARIABLE(vector, uint, 16, 8);
+  DECL_VARIABLE(vector, uint, 32, 4);
+
+  DECL_VARIABLE(vector_res, int, 8, 8);
+  DECL_VARIABLE(vector_res, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 2);
+  DECL_VARIABLE(vector_res, uint, 8, 8);
+  DECL_VARIABLE(vector_res, uint, 16, 4);
+  DECL_VARIABLE(vector_res, uint, 32, 2);
+  DECL_VARIABLE(vector_res, int, 8, 16);
+  DECL_VARIABLE(vector_res, int, 16, 8);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+  DECL_VARIABLE(vector_res, uint, 8, 16);
+  DECL_VARIABLE(vector_res, uint, 16, 8);
+  DECL_VARIABLE(vector_res, uint, 32, 4);
+
+  clean_results ();
+
+  /* Fill input vector with arbitrary values.  */
+  VDUP(vector, , int, s, 8, 8, 0x84);
+  VDUP(vector, , int, s, 16, 4, 0x1234);
+  VDUP(vector, , int, s, 32, 2, 0x5678);
+  VDUP(vector, , uint, u, 8, 8, 0x34);
+  VDUP(vector, , uint, u, 16, 4, 0x8234);
+  VDUP(vector, , uint, u, 32, 2, 0x7654321);
+  VDUP(vector, q, int, s, 8, 16, 0x34);
+  VDUP(vector, q, int, s, 16, 8, 0x1234);
+  VDUP(vector, q, int, s, 32, 4, 0x12345678);
+  VDUP(vector, q, uint, u, 8, 16, 0x13);
+  VDUP(vector, q, uint, u, 16, 8, 0x4);
+  VDUP(vector, q, uint, u, 32, 4, 0x1);
+
+  /* Apply a unary operator named INSN_NAME.  */
+  TEST_UNARY_OP(INSN_NAME, , int, s, 8, 8);
+  TEST_UNARY_OP(INSN_NAME, , int, s, 16, 4);
+  TEST_UNARY_OP(INSN_NAME, , int, s, 32, 2);
+  TEST_UNARY_OP(INSN_NAME, , uint, u, 8, 8);
+  TEST_UNARY_OP(INSN_NAME, , uint, u, 16, 4);
+  TEST_UNARY_OP(INSN_NAME, , uint, u, 32, 2);
+  TEST_UNARY_OP(INSN_NAME, q, int, s, 8, 16);
+  TEST_UNARY_OP(INSN_NAME, q, int, s, 16, 8);
+  TEST_UNARY_OP(INSN_NAME, q, int, s, 32, 4);
+  TEST_UNARY_OP(INSN_NAME, q, uint, u, 8, 16);
+  TEST_UNARY_OP(INSN_NAME, q, uint, u, 16, 8);
+  TEST_UNARY_OP(INSN_NAME, q, uint, u, 32, 4);
+
+  CHECK_RESULTS (TEST_MSG, "");
+
+  /* Test with zero as input.  */
+  VDUP(vector, , int, s, 8, 8, 0);
+  VDUP(vector, , int, s, 16, 4, 0);
+  VDUP(vector, , int, s, 32, 2, 0);
+  VDUP(vector, , uint, u, 8, 8, 0);
+  VDUP(vector, , uint, u, 16, 4, 0);
+  VDUP(vector, , uint, u, 32, 2, 0);
+  VDUP(vector, q, int, s, 8, 16, 0);
+  VDUP(vector, q, int, s, 16, 8, 0);
+  VDUP(vector, q, int, s, 32, 4, 0);
+  VDUP(vector, q, uint, u, 8, 16, 0);
+  VDUP(vector, q, uint, u, 16, 8, 0);
+  VDUP(vector, q, uint, u, 32, 4, 0);
+
+  /* Apply a unary operator named INSN_NAME.  */
+  TEST_UNARY_OP(INSN_NAME, , int, s, 8, 8);
+  TEST_UNARY_OP(INSN_NAME, , int, s, 16, 4);
+  TEST_UNARY_OP(INSN_NAME, , int, s, 32, 2);
+  TEST_UNARY_OP(INSN_NAME, , uint, u, 8, 8);
+  TEST_UNARY_OP(INSN_NAME, , uint, u, 16, 4);
+  TEST_UNARY_OP(INSN_NAME, , uint, u, 32, 2);
+  TEST_UNARY_OP(INSN_NAME, q, int, s, 8, 16);
+  TEST_UNARY_OP(INSN_NAME, q, int, s, 16, 8);
+  TEST_UNARY_OP(INSN_NAME, q, int, s, 32, 4);
+  TEST_UNARY_OP(INSN_NAME, q, uint, u, 8, 16);
+  TEST_UNARY_OP(INSN_NAME, q, uint, u, 16, 8);
+  TEST_UNARY_OP(INSN_NAME, q, uint, u, 32, 4);
+
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_with_0, " (input=0)");
+}
+
+int main (void)
+{
+  exec_vclz ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmulh.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmulh.c
@@ -0,0 +1,122 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected values of cumulative_saturation flag.  */
+int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;
+int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;
+int VECT_VAR(expected_cumulative_sat,int,16,8) = 0;
+int VECT_VAR(expected_cumulative_sat,int,32,4) = 0;
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xffff, 0xffff, 0xffff, 0xffff };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xffffffff, 0xffffffff };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,
+					0xffff, 0xffff, 0xffff, 0xffff };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffff, 0xffffffff,
+					0xffffffff, 0xffffffff };
+
+/* Expected values of cumulative_saturation flag when saturation
+   occurs.  */
+int VECT_VAR(expected_cumulative_sat2,int,16,4) = 1;
+int VECT_VAR(expected_cumulative_sat2,int,32,2) = 1;
+int VECT_VAR(expected_cumulative_sat2,int,16,8) = 1;
+int VECT_VAR(expected_cumulative_sat2,int,32,4) = 1;
+
+/* Expected results when saturation occurs.  */
+VECT_VAR_DECL(expected2,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };
+VECT_VAR_DECL(expected2,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
+VECT_VAR_DECL(expected2,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff,
+					0x7fff, 0x7fff, 0x7fff, 0x7fff };
+VECT_VAR_DECL(expected2,int,32,4) [] = { 0x7fffffff, 0x7fffffff,
+					0x7fffffff, 0x7fffffff };
+
+#define INSN_NAME vqdmulh
+#define TEST_MSG "VQDMULH"
+
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+  /* vector_res = vqdmulh(vector,vector2,lane), then store the result.  */
+#define TEST_VQDMULH2(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \
+  Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N));		\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N),			\
+		      VECT_VAR(vector2, T1, W, N));			\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),				\
+		    VECT_VAR(vector_res, T1, W, N));			\
+  CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
+
+  /* Two auxliary macros are necessary to expand INSN.  */
+#define TEST_VQDMULH1(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \
+  TEST_VQDMULH2(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
+
+#define TEST_VQDMULH(Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)	\
+  TEST_VQDMULH1(INSN_NAME, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
+
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector, int, 32, 4);
+
+  DECL_VARIABLE(vector_res, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 2);
+  DECL_VARIABLE(vector_res, int, 16, 8);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+
+  DECL_VARIABLE(vector2, int, 16, 4);
+  DECL_VARIABLE(vector2, int, 32, 2);
+  DECL_VARIABLE(vector2, int, 16, 8);
+  DECL_VARIABLE(vector2, int, 32, 4);
+
+  clean_results ();
+
+  VLOAD(vector, buffer, , int, s, 16, 4);
+  VLOAD(vector, buffer, , int, s, 32, 2);
+  VLOAD(vector, buffer, q, int, s, 16, 8);
+  VLOAD(vector, buffer, q, int, s, 32, 4);
+
+  /* Initialize vector2.  */
+  VDUP(vector2, , int, s, 16, 4, 0x55);
+  VDUP(vector2, , int, s, 32, 2, 0xBB);
+  VDUP(vector2, q, int, s, 16, 8, 0x33);
+  VDUP(vector2, q, int, s, 32, 4, 0x22);
+
+  TEST_VQDMULH(, int, s, 16, 4, expected_cumulative_sat, "");
+  TEST_VQDMULH(, int, s, 32, 2, expected_cumulative_sat, "");
+  TEST_VQDMULH(q, int, s, 16, 8, expected_cumulative_sat, "");
+  TEST_VQDMULH(q, int, s, 32, 4, expected_cumulative_sat, "");
+
+  CHECK (TEST_MSG, int, 16, 4, PRIx16, expected, "");
+  CHECK (TEST_MSG, int, 32, 2, PRIx32, expected, "");
+  CHECK (TEST_MSG, int, 16, 8, PRIx16, expected, "");
+  CHECK (TEST_MSG, int, 32, 4, PRIx32, expected, "");
+
+  VDUP(vector, , int, s, 16, 4, 0x8000);
+  VDUP(vector2, , int, s, 16, 4, 0x8000);
+  VDUP(vector, , int, s, 32, 2, 0x80000000);
+  VDUP(vector2, , int, s, 32, 2, 0x80000000);
+  VDUP(vector, q, int, s, 16, 8, 0x8000);
+  VDUP(vector2, q, int, s, 16, 8, 0x8000);
+  VDUP(vector, q, int, s, 32, 4, 0x80000000);
+  VDUP(vector2, q, int, s, 32, 4, 0x80000000);
+
+#define TEST_MSG2 "with saturation"
+  TEST_VQDMULH(, int, s, 16, 4, expected_cumulative_sat2, TEST_MSG2);
+  TEST_VQDMULH(, int, s, 32, 2, expected_cumulative_sat2, TEST_MSG2);
+  TEST_VQDMULH(q, int, s, 16, 8, expected_cumulative_sat2, TEST_MSG2);
+  TEST_VQDMULH(q, int, s, 32, 4, expected_cumulative_sat2, TEST_MSG2);
+
+  CHECK (TEST_MSG, int, 16, 4, PRIx16, expected2, TEST_MSG2);
+  CHECK (TEST_MSG, int, 32, 2, PRIx32, expected2, TEST_MSG2);
+  CHECK (TEST_MSG, int, 16, 8, PRIx16, expected2, TEST_MSG2);
+  CHECK (TEST_MSG, int, 32, 4, PRIx32, expected2, TEST_MSG2);
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbic.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbic.c
@@ -0,0 +1,46 @@
+#define INSN_NAME vbic
+#define TEST_MSG "VBIC/VBICQ"
+
+#include "binary_op.inc"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1,
+				       0xf4, 0xf5, 0xf4, 0xf5 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x0, 0x1, 0x2, 0x3 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff0 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffff90 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xe0, 0xe1, 0xe2, 0xe3,
+					0xe0, 0xe1, 0xe2, 0xe3 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffe0, 0xffe1, 0xffe0, 0xffe1 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffd0, 0xffffffd1 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x1, 0x0, 0x1,
+					0x0, 0x1, 0x0, 0x1,
+					0x8, 0x9, 0x8, 0x9,
+					0x8, 0x9, 0x8, 0x9 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x10, 0x11, 0x12, 0x13,
+					0x10, 0x11, 0x12, 0x13 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x10, 0x11, 0x10, 0x11 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffe0, 0xffffffffffffffe1 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					 0xf0, 0xf1, 0xf2, 0xf3,
+					 0xf0, 0xf1, 0xf2, 0xf3,
+					 0xf0, 0xf1, 0xf2, 0xf3 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0,
+					 0xfff4, 0xfff4, 0xfff4, 0xfff4 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffc0, 0xffffffc0,
+					 0xffffffc0, 0xffffffc0 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0,
+					 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+					   0x33333333, 0x33333333 };
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/advsimd-intrinsics.exp
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/advsimd-intrinsics.exp
@@ -0,0 +1,54 @@
+# Copyright (C) 2014 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# GCC testsuite that uses the `dg.exp' driver.
+
+# Exit immediately if this isn't an ARM or AArch64 target.
+if {![istarget arm*-*-*]
+    && ![istarget aarch64*-*-*]} then {
+  return
+}
+
+# Load support procs.
+load_lib gcc-dg.exp
+
+# Initialize `dg'.
+load_lib c-torture.exp
+load_lib target-supports.exp
+load_lib torture-options.exp
+
+dg-init
+
+torture-init
+set-torture-options $C_TORTURE_OPTIONS {{}} $LTO_TORTURE_OPTIONS
+
+# Make sure Neon flags are provided, if necessary.
+set additional_flags [add_options_for_arm_neon ""]
+
+# Main loop.
+foreach src [lsort [glob -nocomplain $srcdir/$subdir/*.c]] {
+    # If we're only testing specific files and this isn't one of them, skip it.
+    if ![runtest_file_p $runtests $src] then {
+	continue
+    }
+
+    c-torture-execute $src $additional_flags
+    gcc-dg-runtest $src $additional_flags
+}
+
+# All done.
+torture-finish
+dg-finish
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmull.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmull.c
@@ -0,0 +1,86 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected values of cumulative_saturation flag.  */
+int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;
+int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x200, 0x1c2, 0x188, 0x152 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x200, 0x1c2 };
+
+/* Expected values of cumulative_saturation flag when saturation
+   occurs.  */
+int VECT_VAR(expected_cumulative_sat2,int,16,4) = 1;
+int VECT_VAR(expected_cumulative_sat2,int,32,2) = 1;
+
+/* Expected results when saturation occurs.  */
+VECT_VAR_DECL(expected2,int,32,4) [] = { 0x7fffffff, 0x7fffffff,
+					 0x7fffffff, 0x7fffffff };
+VECT_VAR_DECL(expected2,int,64,2) [] = { 0x7fffffffffffffff,
+					 0x7fffffffffffffff };
+
+#define INSN_NAME vqdmull
+#define TEST_MSG "VQDMULL"
+
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+  /* Basic test: y=vqdmull(x,x), then store the result.  */
+#define TEST_VQDMULL2(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT)	\
+  Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W2, N));	\
+  VECT_VAR(vector_res, T1, W2, N) =				\
+    INSN##_##T2##W(VECT_VAR(vector, T1, W, N),			\
+		   VECT_VAR(vector2, T1, W, N));		\
+  vst1q_##T2##W2(VECT_VAR(result, T1, W2, N),			\
+		 VECT_VAR(vector_res, T1, W2, N));		\
+  CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
+
+  /* Two auxliary macros are necessary to expand INSN.  */
+#define TEST_VQDMULL1(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT)	\
+  TEST_VQDMULL2(INSN, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT)
+
+#define TEST_VQDMULL(T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT)	\
+  TEST_VQDMULL1(INSN_NAME, T1, T2, W, W2, N, EXPECTED_CUMULATIVE_SAT, CMT)
+
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector2, int, 16, 4);
+  DECL_VARIABLE(vector2, int, 32, 2);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+  DECL_VARIABLE(vector_res, int, 64, 2);
+
+  clean_results ();
+
+  VLOAD(vector, buffer, , int, s, 16, 4);
+  VLOAD(vector, buffer, , int, s, 32, 2);
+  VLOAD(vector2, buffer, , int, s, 16, 4);
+  VLOAD(vector2, buffer, , int, s, 32, 2);
+
+  TEST_VQDMULL(int, s, 16, 32, 4, expected_cumulative_sat, "");
+  TEST_VQDMULL(int, s, 32, 64, 2, expected_cumulative_sat, "");
+
+  CHECK (TEST_MSG, int, 32, 4, PRIx16, expected, "");
+  CHECK (TEST_MSG, int, 64, 2, PRIx32, expected, "");
+
+  VDUP(vector, , int, s, 16, 4, 0x8000);
+  VDUP(vector2, , int, s, 16, 4, 0x8000);
+  VDUP(vector, , int, s, 32, 2, 0x80000000);
+  VDUP(vector2, , int, s, 32, 2, 0x80000000);
+
+#define TEST_MSG2 "with saturation"
+  TEST_VQDMULL(int, s, 16, 32, 4, expected_cumulative_sat2, TEST_MSG2);
+  TEST_VQDMULL(int, s, 32, 64, 2, expected_cumulative_sat2, TEST_MSG2);
+
+  CHECK (TEST_MSG, int, 32, 4, PRIx16, expected2, TEST_MSG2);
+  CHECK (TEST_MSG, int, 64, 2, PRIx32, expected2, TEST_MSG2);
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabal.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabal.c
@@ -0,0 +1,161 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+				       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff6, 0xfff7, 0xfff8, 0xfff9,
+					0xfffa, 0xfffb, 0xfffc, 0xfffd };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x16, 0x17, 0x18, 0x19 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x20, 0x21 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x53, 0x54, 0x55, 0x56,
+					 0x57, 0x58, 0x59, 0x5a };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x907, 0x908, 0x909, 0x90a };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffe7,
+					 0xffffffe8 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+					   0x33333333, 0x33333333 };
+
+/* Expected results for cases with input values chosen to test
+   possible intermediate overflow.  */
+VECT_VAR_DECL(expected2,int,16,8) [] = { 0xef, 0xf0, 0xf1, 0xf2,
+					 0xf3, 0xf4, 0xf5, 0xf6 };
+VECT_VAR_DECL(expected2,int,32,4) [] = { 0xffef, 0xfff0, 0xfff1, 0xfff2 };
+VECT_VAR_DECL(expected2,int,64,2) [] = { 0xffffffef, 0xfffffff0 };
+VECT_VAR_DECL(expected2,uint,16,8) [] = { 0xee, 0xef, 0xf0, 0xf1,
+					  0xf2, 0xf3, 0xf4, 0xf5 };
+VECT_VAR_DECL(expected2,uint,32,4) [] = { 0xffe2, 0xffe3, 0xffe4, 0xffe5 };
+VECT_VAR_DECL(expected2,uint,64,2) [] = { 0xffffffe7, 0xffffffe8 };
+
+#define TEST_MSG "VABAL"
+void exec_vabal (void)
+{
+  /* Basic test: v4=vabal(v1,v2,v3), then store the result.  */
+#define TEST_VABAL(T1, T2, W, W2, N)					\
+  VECT_VAR(vector_res, T1, W2, N) =					\
+    vabal_##T2##W(VECT_VAR(vector1, T1, W2, N),				\
+		  VECT_VAR(vector2, T1, W, N),				\
+		  VECT_VAR(vector3, T1, W, N));				\
+  vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N))
+
+#define DECL_VABAL_VAR_LONG(VAR)		\
+  DECL_VARIABLE(VAR, int, 16, 8);		\
+  DECL_VARIABLE(VAR, int, 32, 4);		\
+  DECL_VARIABLE(VAR, int, 64, 2);		\
+  DECL_VARIABLE(VAR, uint, 16, 8);		\
+  DECL_VARIABLE(VAR, uint, 32, 4);		\
+  DECL_VARIABLE(VAR, uint, 64, 2)
+
+#define DECL_VABAL_VAR_SHORT(VAR)		\
+  DECL_VARIABLE(VAR, int, 8, 8);		\
+  DECL_VARIABLE(VAR, int, 16, 4);		\
+  DECL_VARIABLE(VAR, int, 32, 2);		\
+  DECL_VARIABLE(VAR, uint, 8, 8);		\
+  DECL_VARIABLE(VAR, uint, 16, 4);		\
+  DECL_VARIABLE(VAR, uint, 32, 2)
+
+  DECL_VABAL_VAR_LONG(vector1);
+  DECL_VABAL_VAR_SHORT(vector2);
+  DECL_VABAL_VAR_SHORT(vector3);
+  DECL_VABAL_VAR_LONG(vector_res);
+
+  clean_results ();
+
+  /* Initialize input "vector1" from "buffer".  */
+  VLOAD(vector1, buffer, q, int, s, 16, 8);
+  VLOAD(vector1, buffer, q, int, s, 32, 4);
+  VLOAD(vector1, buffer, q, int, s, 64, 2);
+  VLOAD(vector1, buffer, q, uint, u, 16, 8);
+  VLOAD(vector1, buffer, q, uint, u, 32, 4);
+  VLOAD(vector1, buffer, q, uint, u, 64, 2);
+
+
+  /* Choose init value arbitrarily.  */
+  VDUP(vector2, , int, s, 8, 8, 1);
+  VDUP(vector2, , int, s, 16, 4, -13);
+  VDUP(vector2, , int, s, 32, 2, 8);
+  VDUP(vector2, , uint, u, 8, 8, 1);
+  VDUP(vector2, , uint, u, 16, 4, 13);
+  VDUP(vector2, , uint, u, 32, 2, 8);
+
+  /* Choose init value arbitrarily.  */
+  VDUP(vector3, , int, s, 8, 8, -5);
+  VDUP(vector3, , int, s, 16, 4, 25);
+  VDUP(vector3, , int, s, 32, 2, -40);
+  VDUP(vector3, , uint, u, 8, 8, 100);
+  VDUP(vector3, , uint, u, 16, 4, 2340);
+  VDUP(vector3, , uint, u, 32, 2, 0xffffffff);
+
+  /* Execute the tests.  */
+  TEST_VABAL(int, s, 8, 16, 8);
+  TEST_VABAL(int, s, 16, 32, 4);
+  TEST_VABAL(int, s, 32, 64, 2);
+  TEST_VABAL(uint, u, 8, 16, 8);
+  TEST_VABAL(uint, u, 16, 32, 4);
+  TEST_VABAL(uint, u, 32, 64, 2);
+
+  CHECK_RESULTS (TEST_MSG, "");
+
+  /* Use values that could lead to overflow intermediate
+   * calculations.  */
+  VDUP(vector2, , int, s, 8, 8, 0x80);
+  VDUP(vector2, , int, s, 16, 4, 0x8000);
+  VDUP(vector2, , int, s, 32, 2, 0x80000000);
+  VDUP(vector2, , uint, u, 8, 8, 1);
+  VDUP(vector2, , uint, u, 16, 4, 13);
+  VDUP(vector2, , uint, u, 32, 2, 8);
+
+  VDUP(vector3, , int, s, 8, 8, 0x7f);
+  VDUP(vector3, , int, s, 16, 4, 0x7fff);
+  VDUP(vector3, , int, s, 32, 2, 0x7fffffff);
+  VDUP(vector3, , uint, u, 8, 8, 0xff);
+  VDUP(vector3, , uint, u, 16, 4, 0xffff);
+  VDUP(vector3, , uint, u, 32, 2, 0xffffffff);
+
+  TEST_VABAL(int, s, 8, 16, 8);
+  TEST_VABAL(int, s, 16, 32, 4);
+  TEST_VABAL(int, s, 32, 64, 2);
+  TEST_VABAL(uint, u, 8, 16, 8);
+  TEST_VABAL(uint, u, 16, 32, 4);
+  TEST_VABAL(uint, u, 32, 64, 2);
+
+  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected2, " test intermediate overflow");
+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected2, " test intermediate overflow");
+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected2, " test intermediate overflow");
+  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected2, " test intermediate overflow");
+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected2, " test intermediate overflow");
+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected2, " test intermediate overflow");
+}
+
+int main (void)
+{
+  exec_vabal ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vhadd.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vhadd.c
@@ -0,0 +1,34 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define INSN_NAME vhadd
+#define TEST_MSG "VHADD/VHADDQ"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf1, 0xf2, 0xf2, 0xf3,
+				       0xf3, 0xf4, 0xf4, 0xf5 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff1, 0xfff1, 0xfff2, 0xfff2 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff0 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf1, 0xf2, 0xf2, 0xf3,
+					0xf3, 0xf4, 0xf4, 0xf5 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff1, 0xfff2 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff0 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0xf2, 0xf2, 0xf3, 0xf3,
+					0xf4, 0xf4, 0xf5, 0xf5,
+					0xf6, 0xf6, 0xf7, 0xf7,
+					0xf8, 0xf8, 0xf9, 0xf9 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff1, 0xfff2, 0xfff2, 0xfff3,
+					0xfff3, 0xfff4, 0xfff4, 0xfff5 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
+					0xfffffff1, 0xfffffff2 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf4, 0xf5, 0xf5, 0xf6,
+					 0xf6, 0xf7, 0xf7, 0xf8,
+					 0xf8, 0xf9, 0xf9, 0xfa,
+					 0xfa, 0xfb, 0xfb, 0xfc };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff1, 0xfff1, 0xfff2, 0xfff2,
+					 0xfff3, 0xfff3, 0xfff4, 0xfff4 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
+					 0xfffffff1, 0xfffffff2 };
+
+#include "binary_op_no64.inc"
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vext.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vext.c
@@ -0,0 +1,123 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf7, 0x11, 0x11, 0x11,
+				       0x11, 0x11, 0x11, 0x11 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff3, 0x22, 0x22, 0x22 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff1, 0x33 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf6, 0xf7, 0x55, 0x55,
+					0x55, 0x55, 0x55, 0x55 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff2, 0xfff3, 0x66, 0x66 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff1, 0x77 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf6, 0xf7, 0x55, 0x55,
+					0x55, 0x55, 0x55, 0x55 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff2, 0xfff3, 0x66, 0x66 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0x42066666 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0xfe, 0xff, 0x11, 0x11,
+					0x11, 0x11, 0x11, 0x11,
+					0x11, 0x11, 0x11, 0x11,
+					0x11, 0x11, 0x11, 0x11 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff7, 0x22, 0x22, 0x22,
+					0x22, 0x22, 0x22, 0x22 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff3, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff1, 0x44 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xfc, 0xfd, 0xfe, 0xff,
+					 0x55, 0x55, 0x55, 0x55,
+					 0x55, 0x55, 0x55, 0x55,
+					 0x55, 0x55, 0x55, 0x55 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff6, 0xfff7, 0x66, 0x66,
+					 0x66, 0x66, 0x66, 0x66 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff3, 0x77, 0x77, 0x77 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff1, 0x88 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0xfc, 0xfd, 0xfe, 0xff,
+					 0x55, 0x55, 0x55, 0x55,
+					 0x55, 0x55, 0x55, 0x55,
+					 0x55, 0x55, 0x55, 0x55 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff6, 0xfff7, 0x66, 0x66,
+					 0x66, 0x66, 0x66, 0x66 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1500000, 0x4204cccd,
+					   0x4204cccd, 0x4204cccd };
+
+#define TEST_MSG "VEXT/VEXTQ"
+void exec_vext (void)
+{
+  /* vector_res = vext(vector1,vector2,offset), then store the result.  */
+#define TEST_VEXT(Q, T1, T2, W, N, V)					\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    vext##Q##_##T2##W(VECT_VAR(vector1, T1, W, N),			\
+		      VECT_VAR(vector2, T1, W, N),			\
+		      V);						\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+  DECL_VARIABLE_ALL_VARIANTS(vector1);
+  DECL_VARIABLE_ALL_VARIANTS(vector2);
+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+  clean_results ();
+
+  TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector1, buffer);
+  VLOAD(vector1, buffer, , float, f, 32, 2);
+  VLOAD(vector1, buffer, q, float, f, 32, 4);
+
+  /* Choose arbitrary initialization values.  */
+  VDUP(vector2, , int, s, 8, 8, 0x11);
+  VDUP(vector2, , int, s, 16, 4, 0x22);
+  VDUP(vector2, , int, s, 32, 2, 0x33);
+  VDUP(vector2, , int, s, 64, 1, 0x44);
+  VDUP(vector2, , uint, u, 8, 8, 0x55);
+  VDUP(vector2, , uint, u, 16, 4, 0x66);
+  VDUP(vector2, , uint, u, 32, 2, 0x77);
+  VDUP(vector2, , uint, u, 64, 1, 0x88);
+  VDUP(vector2, , poly, p, 8, 8, 0x55);
+  VDUP(vector2, , poly, p, 16, 4, 0x66);
+  VDUP(vector2, , float, f, 32, 2, 33.6f);
+
+  VDUP(vector2, q, int, s, 8, 16, 0x11);
+  VDUP(vector2, q, int, s, 16, 8, 0x22);
+  VDUP(vector2, q, int, s, 32, 4, 0x33);
+  VDUP(vector2, q, int, s, 64, 2, 0x44);
+  VDUP(vector2, q, uint, u, 8, 16, 0x55);
+  VDUP(vector2, q, uint, u, 16, 8, 0x66);
+  VDUP(vector2, q, uint, u, 32, 4, 0x77);
+  VDUP(vector2, q, uint, u, 64, 2, 0x88);
+  VDUP(vector2, q, poly, p, 8, 16, 0x55);
+  VDUP(vector2, q, poly, p, 16, 8, 0x66);
+  VDUP(vector2, q, float, f, 32, 4, 33.2f);
+
+  /* Choose arbitrary extract offsets.  */
+  TEST_VEXT(, int, s, 8, 8, 7);
+  TEST_VEXT(, int, s, 16, 4, 3);
+  TEST_VEXT(, int, s, 32, 2, 1);
+  TEST_VEXT(, int, s, 64, 1, 0);
+  TEST_VEXT(, uint, u, 8, 8, 6);
+  TEST_VEXT(, uint, u, 16, 4, 2);
+  TEST_VEXT(, uint, u, 32, 2, 1);
+  TEST_VEXT(, uint, u, 64, 1, 0);
+  TEST_VEXT(, poly, p, 8, 8, 6);
+  TEST_VEXT(, poly, p, 16, 4, 2);
+  TEST_VEXT(, float, f, 32, 2, 1);
+
+  TEST_VEXT(q, int, s, 8, 16, 14);
+  TEST_VEXT(q, int, s, 16, 8, 7);
+  TEST_VEXT(q, int, s, 32, 4, 3);
+  TEST_VEXT(q, int, s, 64, 2, 1);
+  TEST_VEXT(q, uint, u, 8, 16, 12);
+  TEST_VEXT(q, uint, u, 16, 8, 6);
+  TEST_VEXT(q, uint, u, 32, 4, 3);
+  TEST_VEXT(q, uint, u, 64, 2, 1);
+  TEST_VEXT(q, poly, p, 8, 16, 12);
+  TEST_VEXT(q, poly, p, 16, 8, 6);
+  TEST_VEXT(q, float, f, 32, 4, 3);
+
+  CHECK_RESULTS (TEST_MSG, "");
+}
+
+int main (void)
+{
+  exec_vext ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vhsub.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vhsub.c
@@ -0,0 +1,32 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define INSN_NAME vhsub
+#define TEST_MSG "VHSUB/VHSUBQ"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xfe, 0xff, 0xff, 0x0,
+				       0x0, 0x1, 0x1, 0x2 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xffff, 0xffff, 0x0, 0x0 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xfe, 0xff, 0xff, 0x0,
+					0x0, 0x1, 0x1, 0x2 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0x0, 0x0, 0x1 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0xfe, 0xfe, 0xff, 0xff,
+					0x0, 0x0, 0x1, 0x1,
+					0x2, 0x2, 0x3, 0x3,
+					0x4, 0x4, 0x5, 0x5 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xfffe, 0xffff, 0xffff, 0x0,
+					0x0, 0x1, 0x1, 0x2 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffff, 0x0, 0x0, 0x1 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xfb, 0xfc, 0xfc, 0xfd,
+					 0xfd, 0xfe, 0xfe, 0xff,
+					 0xff, 0x0, 0x0, 0x1,
+					 0x1, 0x2, 0x2, 0x3 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0x0, 0x0,
+					 0x1, 0x1, 0x2, 0x2 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0x0, 0x0, 0x1 };
+
+#include "binary_op_no64.inc"
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlal_n.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlal_n.c
@@ -0,0 +1,14 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define INSN_NAME vmlal_n
+#define TEST_MSG "VMLAL_N"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x595, 0x596, 0x597, 0x598 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0xb3a, 0xb3b };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x10df, 0x10e0, 0x10e1, 0x10e2 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x10df, 0x10e0 };
+
+#include "vmlXl_n.inc"
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpmin.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpmin.c
@@ -0,0 +1,20 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+
+#define INSN_NAME vpmin
+#define TEST_MSG "VPMIN"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf2, 0xf4, 0xf6,
+				       0xf0, 0xf2, 0xf4, 0xf6 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff2, 0xfff0, 0xfff2 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff0 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf2, 0xf4, 0xf6,
+					0xf0, 0xf2, 0xf4, 0xf6 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff2, 0xfff0, 0xfff2 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff0 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1800000 };
+
+#include "vpXXX.inc"
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/unary_sat_op.inc
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/unary_sat_op.inc
@@ -0,0 +1,80 @@
+/* Template file for saturating unary operator validation.
+
+   This file is meant to be included by the relevant test files, which
+   have to define the intrinsic family to test. If a given intrinsic
+   supports variants which are not supported by all the other
+   saturating unary operators, these can be tested by providing a
+   definition for EXTRA_TESTS.  */
+
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+  /* y=OP(x), then store the result.  */
+#define TEST_UNARY_SAT_OP1(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \
+  Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N));		\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N));			\
+    vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),			\
+		      VECT_VAR(vector_res, T1, W, N));			\
+      CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
+
+#define TEST_UNARY_SAT_OP(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \
+  TEST_UNARY_SAT_OP1(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
+
+  /* No need for 64 bits variants.  */
+  DECL_VARIABLE(vector, int, 8, 8);
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, int, 8, 16);
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector, int, 32, 4);
+
+  DECL_VARIABLE(vector_res, int, 8, 8);
+  DECL_VARIABLE(vector_res, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 2);
+  DECL_VARIABLE(vector_res, int, 8, 16);
+  DECL_VARIABLE(vector_res, int, 16, 8);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer".  */
+  VLOAD(vector, buffer, , int, s, 8, 8);
+  VLOAD(vector, buffer, , int, s, 16, 4);
+  VLOAD(vector, buffer, , int, s, 32, 2);
+  VLOAD(vector, buffer, q, int, s, 8, 16);
+  VLOAD(vector, buffer, q, int, s, 16, 8);
+  VLOAD(vector, buffer, q, int, s, 32, 4);
+
+  /* Apply a saturating unary operator named INSN_NAME.  */
+  TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 8, 8, expected_cumulative_sat, "");
+  TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 16, 4, expected_cumulative_sat, "");
+  TEST_UNARY_SAT_OP(INSN_NAME, , int, s, 32, 2, expected_cumulative_sat, "");
+  TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 8, 16, expected_cumulative_sat, "");
+  TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 16, 8, expected_cumulative_sat, "");
+  TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 32, 4, expected_cumulative_sat, "");
+
+  CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, "");
+  CHECK(TEST_MSG, int, 16, 4, PRIx8, expected, "");
+  CHECK(TEST_MSG, int, 32, 2, PRIx8, expected, "");
+  CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, "");
+  CHECK(TEST_MSG, int, 16, 8, PRIx8, expected, "");
+  CHECK(TEST_MSG, int, 32, 4, PRIx8, expected, "");
+
+#ifdef EXTRA_TESTS
+  EXTRA_TESTS();
+#endif
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpmax.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpmax.c
@@ -0,0 +1,20 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+
+#define INSN_NAME vpmax
+#define TEST_MSG "VPMAX"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf1, 0xf3, 0xf5, 0xf7,
+				       0xf1, 0xf3, 0xf5, 0xf7 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff1, 0xfff3, 0xfff1, 0xfff3 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff1, 0xfffffff1 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf1, 0xf3, 0xf5, 0xf7,
+					0xf1, 0xf3, 0xf5, 0xf7 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff1, 0xfff3, 0xfff1, 0xfff3 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff1, 0xfffffff1 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 };
+
+#include "vpXXX.inc"
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vceq.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vceq.c
@@ -0,0 +1,113 @@
+#define INSN_NAME vceq
+#define TEST_MSG "VCEQ/VCEQQ"
+
+/* Extra tests for _p8 variants, which exist only for vceq.  */
+void exec_vceq_p8(void);
+#define EXTRA_TESTS exec_vceq_p8
+
+#include "cmp_op.inc"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+				       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xff, 0x0 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0xffff, 0x0 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0x0 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333,
+					0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333,
+					0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+					0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
+					 0x0, 0x0, 0x0, 0x0,
+					 0x0, 0x0, 0x0, 0x0,
+					 0xff, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
+					 0x0, 0x0, 0xffff, 0x0 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0xffffffff, 0x0 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+					 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+					   0x33333333, 0x33333333 };
+
+VECT_VAR_DECL(expected_uint,uint,8,8) [] = { 0x0, 0x0, 0x0, 0xff,
+					     0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_uint,uint,16,4) [] = { 0x0, 0x0, 0xffff, 0x0 };
+VECT_VAR_DECL(expected_uint,uint,32,2) [] = { 0x0, 0xffffffff };
+
+VECT_VAR_DECL(expected_q_uint,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
+						0xff, 0x0, 0x0, 0x0,
+						0, 0x0, 0x0, 0x0,
+						0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_q_uint,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
+						0x0, 0x0, 0xffff, 0x0 };
+VECT_VAR_DECL(expected_q_uint,uint,32,4) [] = { 0x0, 0x0, 0xffffffff, 0x0 };
+
+VECT_VAR_DECL(expected_float,uint,32,2) [] = { 0x0, 0xffffffff };
+VECT_VAR_DECL(expected_q_float,uint,32,4) [] = { 0x0, 0x0, 0xffffffff, 0x0 };
+
+VECT_VAR_DECL(expected_uint2,uint,32,2) [] = { 0xffffffff, 0x0 };
+VECT_VAR_DECL(expected_uint3,uint,32,2) [] = { 0x0, 0xffffffff };
+VECT_VAR_DECL(expected_uint4,uint,32,2) [] = { 0xffffffff, 0x0 };
+
+VECT_VAR_DECL(expected_nan,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_mnan,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_nan2,uint,32,2) [] = { 0x0, 0x0 };
+
+VECT_VAR_DECL(expected_inf,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_minf,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_inf2,uint,32,2) [] = { 0x0, 0x0 };
+
+VECT_VAR_DECL(expected_mzero,uint,32,2) [] = { 0xffffffff, 0xffffffff };
+
+VECT_VAR_DECL(expected_p8,uint,8,8) [] = { 0x0, 0x0, 0x0, 0xff,
+					   0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_q_p8,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
+					      0xff, 0x0, 0x0, 0x0,
+					      0x0, 0x0, 0x0, 0x0,
+					      0x0, 0x0, 0x0, 0x0 };
+
+void exec_vceq_p8(void)
+{
+  DECL_VARIABLE(vector, poly, 8, 8);
+  DECL_VARIABLE(vector, poly, 8, 16);
+
+  DECL_VARIABLE(vector2, poly, 8, 8);
+  DECL_VARIABLE(vector2, poly, 8, 16);
+
+  DECL_VARIABLE(vector_res, uint, 8, 8);
+  DECL_VARIABLE(vector_res, uint, 8, 16);
+
+  clean_results ();
+
+  VLOAD(vector, buffer, , poly, p, 8, 8);
+  VLOAD(vector, buffer, q, poly, p, 8, 16);
+
+  VDUP(vector2, , poly, p, 8, 8, 0xF3);
+  VDUP(vector2, q, poly, p, 8, 16, 0xF4);
+
+  TEST_VCOMP(INSN_NAME, , poly, p, uint, 8, 8);
+  TEST_VCOMP(INSN_NAME, q, poly, p, uint, 8, 16);
+
+  CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_p8, "p8");
+  CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_q_p8, "p8");
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmull_lane.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmull_lane.c
@@ -0,0 +1,66 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x4000, 0x4000, 0x4000, 0x4000 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x2000, 0x2000 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x4000, 0x4000, 0x4000, 0x4000 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x2000, 0x2000 };
+
+#define TEST_MSG "VMULL_LANE"
+void exec_vmull_lane (void)
+{
+  /* vector_res = vmull_lane(vector,vector2,lane), then store the result.  */
+#define TEST_VMULL_LANE(T1, T2, W, W2, N, L)				\
+  VECT_VAR(vector_res, T1, W2, N) =					\
+    vmull##_lane_##T2##W(VECT_VAR(vector, T1, W, N),			\
+			 VECT_VAR(vector2, T1, W, N),			\
+			 L);						\
+  vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N))
+
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, uint, 16, 4);
+  DECL_VARIABLE(vector, uint, 32, 2);
+  DECL_VARIABLE(vector2, int, 16, 4);
+  DECL_VARIABLE(vector2, int, 32, 2);
+  DECL_VARIABLE(vector2, uint, 16, 4);
+  DECL_VARIABLE(vector2, uint, 32, 2);
+
+  DECL_VARIABLE(vector_res, int, 32, 4);
+  DECL_VARIABLE(vector_res, int, 64, 2);
+  DECL_VARIABLE(vector_res, uint, 32, 4);
+  DECL_VARIABLE(vector_res, uint, 64, 2);
+
+  clean_results ();
+
+  /* Initialize vector.  */
+  VDUP(vector, , int, s, 16, 4, 0x1000);
+  VDUP(vector, , int, s, 32, 2, 0x1000);
+  VDUP(vector, , uint, u, 16, 4, 0x1000);
+  VDUP(vector, , uint, u, 32, 2, 0x1000);
+
+  /* Initialize vector2.  */
+  VDUP(vector2, , int, s, 16, 4, 0x4);
+  VDUP(vector2, , int, s, 32, 2, 0x2);
+  VDUP(vector2, , uint, u, 16, 4, 0x4);
+  VDUP(vector2, , uint, u, 32, 2, 0x2);
+
+  /* Choose lane arbitrarily.  */
+  TEST_VMULL_LANE(int, s, 16, 32, 4, 2);
+  TEST_VMULL_LANE(int, s, 32, 64, 2, 1);
+  TEST_VMULL_LANE(uint, u, 16, 32, 4, 2);
+  TEST_VMULL_LANE(uint, u, 32, 64, 2, 1);
+
+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, "");
+  CHECK(TEST_MSG, int, 64, 2, PRIx32, expected, "");
+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, "");
+  CHECK(TEST_MSG, uint, 64, 2, PRIx32, expected, "");
+}
+
+int main (void)
+{
+  exec_vmull_lane ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpadd.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpadd.c
@@ -0,0 +1,19 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define INSN_NAME vpadd
+#define TEST_MSG "VPADD"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xe1, 0xe5, 0xe9, 0xed,
+				       0xe1, 0xe5, 0xe9, 0xed };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xffe1, 0xffe5, 0xffe1, 0xffe5 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xffffffe1, 0xffffffe1 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xe1, 0xe5, 0xe9, 0xed,
+					0xe1, 0xe5, 0xe9, 0xed };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffe1, 0xffe5, 0xffe1, 0xffe5 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffe1, 0xffffffe1 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1f80000, 0xc1f80000 };
+
+#include "vpXXX.inc"
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vneg.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vneg.c
@@ -0,0 +1,74 @@
+#define INSN_NAME vneg
+#define TEST_MSG "VNEG/VNEGQ"
+
+/* Extra tests for functions requiring floating-point types.  */
+void exec_vneg_f32(void);
+#define EXTRA_TESTS exec_vneg_f32
+
+#include "unary_op.inc"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x10, 0xf, 0xe, 0xd,
+				       0xc, 0xb, 0xa, 0x9 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x10, 0xf, 0xe, 0xd };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x10, 0xf };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9,
+					0x8, 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x10, 0xf, 0xe, 0xd,
+					0xc, 0xb, 0xa, 0x9 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x10, 0xf, 0xe, 0xd };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+					0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x33333333, 0x33333333,
+					 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+					 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+					   0x33333333, 0x33333333 };
+
+/* Expected results for float32 variants. Needs to be separated since
+   the generic test function does not test floating-point
+   versions.  */
+VECT_VAR_DECL(expected_float32,hfloat,32,2) [] = { 0xc0133333, 0xc0133333 };
+VECT_VAR_DECL(expected_float32,hfloat,32,4) [] = { 0xc059999a, 0xc059999a,
+						   0xc059999a, 0xc059999a };
+
+void exec_vneg_f32(void)
+{
+  DECL_VARIABLE(vector, float, 32, 2);
+  DECL_VARIABLE(vector, float, 32, 4);
+
+  DECL_VARIABLE(vector_res, float, 32, 2);
+  DECL_VARIABLE(vector_res, float, 32, 4);
+
+  VDUP(vector, , float, f, 32, 2, 2.3f);
+  VDUP(vector, q, float, f, 32, 4, 3.4f);
+
+  TEST_UNARY_OP(INSN_NAME, , float, f, 32, 2);
+  TEST_UNARY_OP(INSN_NAME, q, float, f, 32, 4);
+
+  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_float32, "");
+  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_float32, "");
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcgt.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcgt.c
@@ -0,0 +1,76 @@
+#define INSN_NAME vcgt
+#define TEST_MSG "VCGT/VCGTQ"
+
+#include "cmp_op.inc"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+				       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xff };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x0, 0x0, 0x0, 0xffff };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0xffffffff };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333,
+					0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333,
+					0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+					0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
+					 0x0, 0x0, 0x0, 0x0,
+					 0x0, 0x0, 0x0, 0x0,
+					 0x0, 0xff, 0xff, 0xff };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
+					 0x0, 0x0, 0x0, 0xffff };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0xffffffff };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+					 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+					   0x33333333, 0x33333333 };
+
+VECT_VAR_DECL(expected_uint,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
+					     0xff, 0xff, 0xff, 0xff };
+VECT_VAR_DECL(expected_uint,uint,16,4) [] = { 0x0, 0x0, 0x0, 0xffff };
+VECT_VAR_DECL(expected_uint,uint,32,2) [] = { 0x0, 0x0 };
+
+VECT_VAR_DECL(expected_q_uint,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
+						0x0, 0xff, 0xff, 0xff,
+						0xff, 0xff, 0xff, 0xff,
+						0xff, 0xff, 0xff, 0xff };
+VECT_VAR_DECL(expected_q_uint,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
+						0x0, 0x0, 0x0, 0xffff };
+VECT_VAR_DECL(expected_q_uint,uint,32,4) [] = { 0x0, 0x0, 0x0, 0xffffffff };
+
+VECT_VAR_DECL(expected_float,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_q_float,uint,32,4) [] = { 0x0, 0x0, 0x0, 0xffffffff };
+
+VECT_VAR_DECL(expected_uint2,uint,32,2) [] = { 0x0, 0xffffffff };
+VECT_VAR_DECL(expected_uint3,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_uint4,uint,32,2) [] = { 0x0, 0xffffffff };
+
+VECT_VAR_DECL(expected_nan,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_mnan,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_nan2,uint,32,2) [] = { 0x0, 0x0 };
+
+VECT_VAR_DECL(expected_inf,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_minf,uint,32,2) [] = { 0xffffffff, 0xffffffff };
+VECT_VAR_DECL(expected_inf2,uint,32,2) [] = { 0xffffffff, 0xffffffff };
+
+VECT_VAR_DECL(expected_mzero,uint,32,2) [] = { 0x0, 0x0 };
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmovl.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmovl.c
@@ -0,0 +1,52 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+					0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
+					0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0,
+					0xfffffffffffffff1 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					 0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffff0, 0xfffffff1 };
+
+#define TEST_MSG "VMOVL"
+void exec_vmovl (void)
+{
+  /* Basic test: vec128=vmovl(vec64), then store the result.  */
+#define TEST_VMOVL(T1, T2, W, W2, N)					\
+  VECT_VAR(vector128, T1, W2, N) =					\
+    vmovl_##T2##W(VECT_VAR(vector64, T1, W, N));			\
+  vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector128, T1, W2, N))
+
+  DECL_VARIABLE_64BITS_VARIANTS(vector64);
+  DECL_VARIABLE_128BITS_VARIANTS(vector128);
+
+  TEST_MACRO_64BITS_VARIANTS_2_5(VLOAD, vector64, buffer);
+
+  clean_results ();
+
+  TEST_VMOVL(int, s, 8, 16, 8);
+  TEST_VMOVL(int, s, 16, 32, 4);
+  TEST_VMOVL(int, s, 32, 64, 2);
+  TEST_VMOVL(uint, u, 8, 16, 8);
+  TEST_VMOVL(uint, u, 16, 32, 4);
+  TEST_VMOVL(uint, u, 32, 64, 2);
+
+  CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, "");
+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, "");
+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, "");
+  CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, "");
+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, "");
+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, "");
+}
+
+int main (void)
+{
+  exec_vmovl ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcagt.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcagt.c
@@ -0,0 +1,51 @@
+#define INSN_NAME vcagt
+#define TEST_MSG "VCAGT/VCAGTQ"
+
+#include "cmp_fp_op.inc"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+				       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333,
+					0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffff, 0xffffffff, 0x0, 0x0 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+					0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333,
+					 0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff,
+					 0x0, 0x0 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+					 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+					   0x33333333, 0x33333333 };
+
+VECT_VAR_DECL(expected2,uint,32,2) [] = { 0xffffffff, 0xffffffff };
+VECT_VAR_DECL(expected2,uint,32,4) [] = { 0xffffffff, 0xffffffff,
+					  0xffffffff, 0xffffffff };
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_lane.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_lane.c
@@ -0,0 +1,123 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+				       0xaa, 0xaa, 0xf0, 0xaa };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xfff0 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xaaaaaaaa, 0xfffffff0 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+					0xaa, 0xaa, 0xaa, 0xf0 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xfff0 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xaaaaaaaa, 0xfffffff0 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+					0xaa, 0xaa, 0xaa, 0xf0 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xfff0 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xaaaaaaaa, 0xc1800000 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+					0xaa, 0xaa, 0xaa, 0xaa,
+					0xaa, 0xaa, 0xaa, 0xaa,
+					0xaa, 0xaa, 0xaa, 0xf0 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+					0xaaaa, 0xfff0, 0xaaaa, 0xaaaa };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+					0xfffffff0, 0xaaaaaaaa };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
+					0xfffffffffffffff0 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+					 0xaa, 0xaa, 0xaa, 0xaa,
+					 0xaa, 0xaa, 0xaa, 0xaa,
+					 0xf0, 0xaa, 0xaa, 0xaa };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+					 0xaaaa, 0xaaaa, 0xfff0, 0xaaaa };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+					 0xfffffff0, 0xaaaaaaaa };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0,
+					 0xaaaaaaaaaaaaaaaa };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
+					 0xaa, 0xaa, 0xaa, 0xaa,
+					 0xaa, 0xaa, 0xaa, 0xaa,
+					 0xf0, 0xaa, 0xaa, 0xaa };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
+					 0xaaaa, 0xaaaa, 0xfff0, 0xaaaa };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa,
+					   0xc1800000, 0xaaaaaaaa };
+
+#define TEST_MSG "VLD1_LANE/VLD1_LANEQ"
+void exec_vld1_lane (void)
+{
+  /* Fill vector_src with 0xAA, then load 1 lane.  */
+#define TEST_VLD1_LANE(Q, T1, T2, W, N, L)				\
+  memset (VECT_VAR(buffer_src, T1, W, N), 0xAA, W/8*N);			\
+  VECT_VAR(vector_src, T1, W, N) =					\
+    vld1##Q##_##T2##W(VECT_VAR(buffer_src, T1, W, N));			\
+  VECT_VAR(vector, T1, W, N) =						\
+    vld1##Q##_lane_##T2##W(VECT_VAR(buffer, T1, W, N),			\
+			   VECT_VAR(vector_src, T1, W, N), L);		\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector, T1, W, N))
+
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+  DECL_VARIABLE_ALL_VARIANTS(vector_src);
+
+  ARRAY(buffer_src, int, 8, 8);
+  ARRAY(buffer_src, int, 16, 4);
+  ARRAY(buffer_src, int, 32, 2);
+  ARRAY(buffer_src, int, 64, 1);
+  ARRAY(buffer_src, uint, 8, 8);
+  ARRAY(buffer_src, uint, 16, 4);
+  ARRAY(buffer_src, uint, 32, 2);
+  ARRAY(buffer_src, uint, 64, 1);
+  ARRAY(buffer_src, poly, 8, 8);
+  ARRAY(buffer_src, poly, 16, 4);
+  ARRAY(buffer_src, float, 32, 2);
+
+  ARRAY(buffer_src, int, 8, 16);
+  ARRAY(buffer_src, int, 16, 8);
+  ARRAY(buffer_src, int, 32, 4);
+  ARRAY(buffer_src, int, 64, 2);
+  ARRAY(buffer_src, uint, 8, 16);
+  ARRAY(buffer_src, uint, 16, 8);
+  ARRAY(buffer_src, uint, 32, 4);
+  ARRAY(buffer_src, uint, 64, 2);
+  ARRAY(buffer_src, poly, 8, 16);
+  ARRAY(buffer_src, poly, 16, 8);
+  ARRAY(buffer_src, float, 32, 4);
+
+  clean_results ();
+
+  /* Choose lane arbitrarily.  */
+  TEST_VLD1_LANE(, int, s, 8, 8, 6);
+  TEST_VLD1_LANE(, int, s, 16, 4, 3);
+  TEST_VLD1_LANE(, int, s, 32, 2, 1);
+  TEST_VLD1_LANE(, int, s, 64, 1, 0);
+  TEST_VLD1_LANE(, uint, u, 8, 8, 7);
+  TEST_VLD1_LANE(, uint, u, 16, 4, 3);
+  TEST_VLD1_LANE(, uint, u, 32, 2, 1);
+  TEST_VLD1_LANE(, uint, u, 64, 1, 0);
+  TEST_VLD1_LANE(, poly, p, 8, 8, 7);
+  TEST_VLD1_LANE(, poly, p, 16, 4, 3);
+  TEST_VLD1_LANE(, float, f, 32, 2, 1);
+
+  TEST_VLD1_LANE(q, int, s, 8, 16, 15);
+  TEST_VLD1_LANE(q, int, s, 16, 8, 5);
+  TEST_VLD1_LANE(q, int, s, 32, 4, 2);
+  TEST_VLD1_LANE(q, int, s, 64, 2, 1);
+  TEST_VLD1_LANE(q, uint, u, 8, 16, 12);
+  TEST_VLD1_LANE(q, uint, u, 16, 8, 6);
+  TEST_VLD1_LANE(q, uint, u, 32, 4, 2);
+  TEST_VLD1_LANE(q, uint, u, 64, 2, 0);
+  TEST_VLD1_LANE(q, poly, p, 8, 16, 12);
+  TEST_VLD1_LANE(q, poly, p, 16, 8, 6);
+  TEST_VLD1_LANE(q, float, f, 32, 4, 2);
+
+  CHECK_RESULTS (TEST_MSG, "");
+}
+
+int main (void)
+{
+  exec_vld1_lane ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmovn.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmovn.c
@@ -0,0 +1,50 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+				       0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+					0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+
+#define TEST_MSG "VMOVN"
+void exec_vmovn (void)
+{
+  /* Basic test: vec64=vmovn(vec128), then store the result.  */
+#define TEST_VMOVN(T1, T2, W, W2, N)					\
+  VECT_VAR(vector64, T1, W2, N) =					\
+    vmovn_##T2##W(VECT_VAR(vector128, T1, W, N));			\
+  vst1_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector64, T1, W2, N))
+
+  DECL_VARIABLE_64BITS_VARIANTS(vector64);
+  DECL_VARIABLE_128BITS_VARIANTS(vector128);
+
+  TEST_MACRO_128BITS_VARIANTS_2_5(VLOAD, vector128, buffer);
+
+  clean_results ();
+
+  TEST_VMOVN(int, s, 16, 8, 8);
+  TEST_VMOVN(int, s, 32, 16, 4);
+  TEST_VMOVN(int, s, 64, 32, 2);
+  TEST_VMOVN(uint, u, 16, 8, 8);
+  TEST_VMOVN(uint, u, 32, 16, 4);
+  TEST_VMOVN(uint, u, 64, 32, 2);
+
+  CHECK(TEST_MSG, int, 8, 8, PRIx32, expected, "");
+  CHECK(TEST_MSG, int, 16, 4, PRIx64, expected, "");
+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, "");
+  CHECK(TEST_MSG, uint, 8, 8, PRIx32, expected, "");
+  CHECK(TEST_MSG, uint, 16, 4, PRIx64, expected, "");
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, "");
+}
+
+int main (void)
+{
+  exec_vmovn ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbsl.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbsl.c
@@ -0,0 +1,124 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
+				       0xf6, 0xf6, 0xf6, 0xf6 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff0, 0xfff2, 0xfff2 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff0 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffffd };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf3, 0xf3, 0xf3, 0xf3,
+					0xf7, 0xf7, 0xf7, 0xf7 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff0, 0xfff2, 0xfff2 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff0 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffff1 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf3, 0xf3, 0xf3, 0xf3,
+					0xf7, 0xf7, 0xf7, 0xf7 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff0, 0xfff2, 0xfff2 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800004, 0xc1700004 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
+					0xf6, 0xf6, 0xf6, 0xf6,
+					0xf2, 0xf2, 0xf2, 0xf2,
+					0xf6, 0xf6, 0xf6, 0xf6 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff0, 0xfff2, 0xfff2,
+					0xfff4, 0xfff4, 0xfff6, 0xfff6 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff0,
+					0xfffffff2, 0xfffffff2 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffffd,
+					0xfffffffffffffffd };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf3, 0xf3, 0xf3, 0xf3,
+					 0xf7, 0xf7, 0xf7, 0xf7,
+					 0xf3, 0xf3, 0xf3, 0xf3,
+					 0xf7, 0xf7, 0xf7, 0xf7 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff0, 0xfff2, 0xfff2,
+					 0xfff4, 0xfff4, 0xfff6, 0xfff6 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff0,
+					 0xfffffff2, 0xfffffff2 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffff1,
+					 0xfffffff1 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf3, 0xf3, 0xf3, 0xf3,
+					 0xf7, 0xf7, 0xf7, 0xf7,
+					 0xf3, 0xf3, 0xf3, 0xf3,
+					 0xf7, 0xf7, 0xf7, 0xf7 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff0, 0xfff2, 0xfff2,
+					 0xfff4, 0xfff4, 0xfff6, 0xfff6 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800001, 0xc1700001,
+					   0xc1600001, 0xc1500001 };
+
+#define TEST_MSG "VBSL/VBSLQ"
+void exec_vbsl (void)
+{
+  /* Basic test: y=vbsl(unsigned_vec,x1,x2), then store the result.  */
+#define TEST_VBSL(T3, Q, T1, T2, W, N)					\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    vbsl##Q##_##T2##W(VECT_VAR(vector_first, T3, W, N),			\
+		      VECT_VAR(vector, T1, W, N),			\
+		      VECT_VAR(vector2, T1, W, N));			\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+  DECL_VARIABLE_ALL_VARIANTS(vector2);
+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+  DECL_VARIABLE_UNSIGNED_VARIANTS(vector_first);
+
+  clean_results ();
+
+  TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
+  VLOAD(vector, buffer, , float, f, 32, 2);
+  VLOAD(vector, buffer, q, float, f, 32, 4);
+
+  /* Choose init value arbitrarily, will be used for vector
+     comparison. As we want different values for each type variant, we
+     can't use generic initialization macros.  */
+  VDUP(vector2, , int, s, 8, 8, -10);
+  VDUP(vector2, , int, s, 16, 4, -14);
+  VDUP(vector2, , int, s, 32, 2, -30);
+  VDUP(vector2, , int, s, 64, 1, -33);
+  VDUP(vector2, , uint, u, 8, 8, 0xF3);
+  VDUP(vector2, , uint, u, 16, 4, 0xFFF2);
+  VDUP(vector2, , uint, u, 32, 2, 0xFFFFFFF0);
+  VDUP(vector2, , uint, u, 64, 1, 0xFFFFFFF3);
+  VDUP(vector2, , float, f, 32, 2, -30.3f);
+  VDUP(vector2, , poly, p, 8, 8, 0xF3);
+  VDUP(vector2, , poly, p, 16, 4, 0xFFF2);
+
+  VDUP(vector2, q, int, s, 8, 16, -10);
+  VDUP(vector2, q, int, s, 16, 8, -14);
+  VDUP(vector2, q, int, s, 32, 4, -30);
+  VDUP(vector2, q, int, s, 64, 2, -33);
+  VDUP(vector2, q, uint, u, 8, 16, 0xF3);
+  VDUP(vector2, q, uint, u, 16, 8, 0xFFF2);
+  VDUP(vector2, q, uint, u, 32, 4, 0xFFFFFFF0);
+  VDUP(vector2, q, uint, u, 64, 2, 0xFFFFFFF3);
+  VDUP(vector2, q, poly, p, 8, 16, 0xF3);
+  VDUP(vector2, q, poly, p, 16, 8, 0xFFF2);
+  VDUP(vector2, q, float, f, 32, 4, -30.4f);
+
+  VDUP(vector_first, , uint, u, 8, 8, 0xF4);
+  VDUP(vector_first, , uint, u, 16, 4, 0xFFF6);
+  VDUP(vector_first, , uint, u, 32, 2, 0xFFFFFFF2);
+  VDUP(vector_first, , uint, u, 64, 1, 0xFFFFFFF2);
+  VDUP(vector_first, q, uint, u, 8, 16, 0xF4);
+  VDUP(vector_first, q, uint, u, 16, 8, 0xFFF6);
+  VDUP(vector_first, q, uint, u, 32, 4, 0xFFFFFFF2);
+  VDUP(vector_first, q, uint, u, 64, 2, 0xFFFFFFF2);
+
+  /* Execute the tests.  */
+  TEST_MACRO_ALL_VARIANTS_1_5(TEST_VBSL, uint);
+  TEST_VBSL(uint, , poly, p, 8, 8);
+  TEST_VBSL(uint, , poly, p, 16, 4);
+  TEST_VBSL(uint, q, poly, p, 8, 16);
+  TEST_VBSL(uint, q, poly, p, 16, 8);
+  TEST_VBSL(uint, , float, f, 32, 2);
+  TEST_VBSL(uint, q, float, f, 32, 4);
+
+  CHECK_RESULTS (TEST_MSG, "");
+}
+
+int main (void)
+{
+  exec_vbsl ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsubw.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsubw.c
@@ -0,0 +1,50 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define INSN_NAME vsubw
+#define TEST_MSG "VSUBW"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+				       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xfffd, 0xfffe, 0xffff, 0x0,
+					0x1, 0x2, 0x3, 0x4 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffffe, 0xffffffff, 0x0, 0x1 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x1 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfefd, 0xfefe, 0xfeff, 0xff00,
+					 0xff01, 0xff02, 0xff03, 0xff04 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffeffff, 0xffff0000,
+					 0xffff0001, 0xffff0002 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffff00000000,
+					 0xffffffff00000001 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+					   0x33333333, 0x33333333 };
+
+#include "vXXXw.inc"
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_op.inc
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_op.inc
@@ -0,0 +1,70 @@
+/* Template file for binary operator validation.
+
+   This file is meant to be included by the relevant test files, which
+   have to define the intrinsic family to test. If a given intrinsic
+   supports variants which are not supported by all the other binary
+   operators, these can be tested by providing a definition for
+   EXTRA_TESTS.  */
+
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+  /* Basic test: y=OP(x1,x2), then store the result.  */
+#define TEST_BINARY_OP1(INSN, Q, T1, T2, W, N)				\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N),			\
+		      VECT_VAR(vector2, T1, W, N));			\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+#define TEST_BINARY_OP(INSN, Q, T1, T2, W, N)				\
+  TEST_BINARY_OP1(INSN, Q, T1, T2, W, N)				\
+
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+  DECL_VARIABLE_ALL_VARIANTS(vector2);
+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer".  */
+  TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
+
+  /* Fill input vector2 with arbitrary values.  */
+  VDUP(vector2, , int, s, 8, 8, 2);
+  VDUP(vector2, , int, s, 16, 4, -4);
+  VDUP(vector2, , int, s, 32, 2, 3);
+  VDUP(vector2, , int, s, 64, 1, 100);
+  VDUP(vector2, , uint, u, 8, 8, 20);
+  VDUP(vector2, , uint, u, 16, 4, 30);
+  VDUP(vector2, , uint, u, 32, 2, 40);
+  VDUP(vector2, , uint, u, 64, 1, 2);
+  VDUP(vector2, q, int, s, 8, 16, -10);
+  VDUP(vector2, q, int, s, 16, 8, -20);
+  VDUP(vector2, q, int, s, 32, 4, -30);
+  VDUP(vector2, q, int, s, 64, 2, 24);
+  VDUP(vector2, q, uint, u, 8, 16, 12);
+  VDUP(vector2, q, uint, u, 16, 8, 3);
+  VDUP(vector2, q, uint, u, 32, 4, 55);
+  VDUP(vector2, q, uint, u, 64, 2, 3);
+
+  /* Apply a binary operator named INSN_NAME.  */
+  TEST_MACRO_ALL_VARIANTS_1_5(TEST_BINARY_OP, INSN_NAME);
+
+  CHECK_RESULTS (TEST_MSG, "");
+
+#ifdef EXTRA_TESTS
+  EXTRA_TESTS();
+#endif
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/veor.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/veor.c
@@ -0,0 +1,47 @@
+#define INSN_NAME veor
+#define TEST_MSG "VEOR/VEORQ"
+
+#include "binary_op.inc"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf2, 0xf3, 0xf0, 0xf1,
+				       0xf6, 0xf7, 0xf4, 0xf5 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xc, 0xd, 0xe, 0xf };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff3, 0xfffffff2 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffff94 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xe4, 0xe5, 0xe6, 0xe7,
+					0xe0, 0xe1, 0xe2, 0xe3 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffee, 0xffef, 0xffec, 0xffed };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffd8, 0xffffffd9 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x6, 0x7, 0x4, 0x5,
+					0x2, 0x3, 0x0, 0x1,
+					0xe, 0xf, 0xc, 0xd,
+					0xa, 0xb, 0x8, 0x9 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x1c, 0x1d, 0x1e, 0x1f,
+					0x18, 0x19, 0x1a, 0x1b };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x12, 0x13, 0x10, 0x11 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffe8,
+					0xffffffffffffffe9 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xfc, 0xfd, 0xfe, 0xff,
+					 0xf8, 0xf9, 0xfa, 0xfb,
+					 0xf4, 0xf5, 0xf6, 0xf7,
+					 0xf0, 0xf1, 0xf2, 0xf3 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0,
+					 0xfff7, 0xfff6, 0xfff5, 0xfff4 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffc7, 0xffffffc6,
+					 0xffffffc5, 0xffffffc4 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff3,
+					 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+					   0x33333333, 0x33333333 };
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlal.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlal.c
@@ -0,0 +1,18 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define INSN_NAME vmlal
+#define TEST_MSG "VMLAL"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xe907, 0xe908, 0xe909, 0xe90a,
+					0xe90b, 0xe90c, 0xe90d, 0xe90e };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3e07, 0x3e08 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a,
+					 0x3e0b, 0x3e0c, 0x3e0d, 0x3e0e };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x3e07, 0x3e08, 0x3e09, 0x3e0a };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3e07, 0x3e08 };
+
+#include "vmlXl.inc"
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/README
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/README
@@ -0,0 +1,132 @@
+This directory contains executable tests for ARM/AArch64 Advanced SIMD
+(Neon) intrinsics.
+
+It is meant to cover execution cases of all the Advanced SIMD
+intrinsics, but does not scan the generated assembler code.
+
+The general framework is composed as follows:
+- advsimd-intrinsics.exp: main dejagnu driver
+- *.c: actual tests, generally one per intrinsinc family
+- arm-neon-ref.h: contains macro definitions to save typing in actual
+  test files
+- compute-ref-data.h: contains input vectors definitions
+- *.inc: generic tests, shared by several families of intrinsics. For
+   instance, unary or binary operators
+
+A typical .c test file starts with the following contents (look at
+vld1.c and vaba.c for sample cases):
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+Then, definitions of expected results, based on common input values,
+as defined in compute-ref-data.h.
+For example:
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x16, 0x17, 0x18, 0x19 };
+defines the expected results of an operator generating int16x4 values.
+
+The common input values defined in compute-ref-data.h have been chosen
+to avoid corner-case values for most operators, yet exposing negative
+values for signed operators. For this reason, their range is also
+limited. For instance, the initialization of buffer_int16x4 will be
+{ -16, -15, -14, -13 }.
+
+The initialization of floating-point values is done via hex notation,
+to avoid potential rounding problems.
+
+To test special values and corner cases, specific initialization
+values should be used in dedicated tests, to ensure proper coverage.
+An example of this is vshl.
+
+When a variant of an intrinsic is not available, its expected result
+should be defined to the value of CLEAN_PATTERN_8 as defined in
+arm-neon-ref.h. For example:
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+if the given intrinsic has no variant producing an int64x1 result,
+like the vcmp family (eg. vclt).
+
+This is because the helper function (check_results(), defined in
+arm-neon-ref.h), iterates over all the possible variants, to save
+typing in each individual test file. Alternatively, one can directly
+call the CHECK/CHECK_FP macros to check only a few expected results
+(see vabs.c for an example).
+
+Then, define the TEST_MSG string, which will be used when reporting errors.
+
+Next, define the function performing the actual tests, in general
+relying on the helpers provided by arm-neon-ref.h, which means:
+
+* declare necessary vectors of suitable types: using
+  DECL_VARIABLE_ALL_VARIANTS when all variants are supported, or the
+  relevant of subset calls to DECL_VARIABLE.
+
+* call clean_results() to initialize the 'results' buffers.
+
+* initialize the input vectors, using VLOAD, VDUP or VSET_LANE (vld*
+  tests do not need this step, since their actual purpose is to
+  initialize vectors).
+
+* execute the intrinsic on relevant variants, for instance using
+  TEST_MACRO_ALL_VARIANTS_2_5.
+
+* call check_results() to check that the results match the expected
+  values.
+
+A template test file could be:
+=================================================================
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf6, 0xf7, 0xf8, 0xf9,
+				       0xfa, 0xfb, 0xfc, 0xfd };
+/* and as many others as necessary.  */
+
+#define TEST_MSG "VMYINTRINSIC"
+void exec_myintrinsic (void)
+{
+  /* my test: v4=vmyintrinsic(v1,v2,v3), then store the result.  */
+#define TEST_VMYINTR(Q, T1, T2, W, N)					\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    vmyintr##Q##_##T2##W(VECT_VAR(vector1, T1, W, N),			\
+			 VECT_VAR(vector2, T1, W, N),			\
+			 VECT_VAR(vector3, T1, W, N));			\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+#define DECL_VMYINTR_VAR(VAR)			\
+  DECL_VARIABLE(VAR, int, 8, 8);
+/* And as many others as necessary.  */
+
+  DECL_VMYINTR_VAR(vector1);
+  DECL_VMYINTR_VAR(vector2);
+  DECL_VMYINTR_VAR(vector3);
+  DECL_VMYINTR_VAR(vector_res);
+
+  clean_results ();
+
+  /* Initialize input "vector1" from "buffer".  */
+  VLOAD(vector1, buffer, , int, s, 8, 8);
+/* And as many others as necessary.  */
+
+  /* Choose init value arbitrarily.  */
+  VDUP(vector2, , int, s, 8, 8, 1);
+/* And as many others as necessary.  */
+
+  /* Choose init value arbitrarily.  */
+  VDUP(vector3, , int, s, 8, 8, -5);
+/* And as many others as necessary.  */
+
+  /* Execute the tests.  */
+  TEST_VMYINTR(, int, s, 8, 8);
+/* And as many others as necessary.  */
+
+  check_results (TEST_MSG, "");
+}
+
+int main (void)
+{
+  exec_vmyintrinsic ();
+  return 0;
+}
+=================================================================
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabd.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabd.c
@@ -0,0 +1,153 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+#include <math.h>
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x11, 0x10, 0xf, 0xe,
+				       0xd, 0xc, 0xb, 0xa };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x3, 0x2, 0x1, 0x0 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x18, 0x17 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xef, 0xf0, 0xf1, 0xf2,
+					0xf3, 0xf4, 0xf5, 0xf6 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffe3, 0xffe4, 0xffe5, 0xffe6 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffe8, 0xffffffe9 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x41c26666, 0x41ba6666 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x1a, 0x19, 0x18, 0x17,
+					0x16, 0x15, 0x14, 0x13,
+					0x12, 0x11, 0x10, 0xf,
+					0xe, 0xd, 0xc, 0xb };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x4, 0x3, 0x2, 0x1,
+					0x0, 0x1, 0x2, 0x3 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x30, 0x2f, 0x2e, 0x2d };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+					0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xe6, 0xe7, 0xe8, 0xe9,
+					 0xea, 0xeb, 0xec, 0xed,
+					 0xee, 0xef, 0xf0, 0xf1,
+					 0xf2, 0xf3, 0xf4, 0xf5 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffe4, 0xffe5, 0xffe6, 0xffe7,
+					 0xffe8, 0xffe9, 0xffea, 0xffeb };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffd0, 0xffffffd1,
+					 0xffffffd2, 0xffffffd3 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+					 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x42407ae1, 0x423c7ae1,
+					   0x42387ae1, 0x42347ae1 };
+
+/* Additional expected results for float32 variants with specially
+   chosen input values.  */
+VECT_VAR_DECL(expected_float32,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+
+#define TEST_MSG "VABD/VABDQ"
+void exec_vabd (void)
+{
+  /* Basic test: v4=vabd(v1,v2), then store the result.  */
+#define TEST_VABD(Q, T1, T2, W, N)					\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    vabd##Q##_##T2##W(VECT_VAR(vector1, T1, W, N),			\
+		      VECT_VAR(vector2, T1, W, N));			\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+#define DECL_VABD_VAR(VAR)			\
+  DECL_VARIABLE(VAR, int, 8, 8);		\
+  DECL_VARIABLE(VAR, int, 16, 4);		\
+  DECL_VARIABLE(VAR, int, 32, 2);		\
+  DECL_VARIABLE(VAR, uint, 8, 8);		\
+  DECL_VARIABLE(VAR, uint, 16, 4);		\
+  DECL_VARIABLE(VAR, uint, 32, 2);		\
+  DECL_VARIABLE(VAR, float, 32, 2);		\
+  DECL_VARIABLE(VAR, int, 8, 16);		\
+  DECL_VARIABLE(VAR, int, 16, 8);		\
+  DECL_VARIABLE(VAR, int, 32, 4);		\
+  DECL_VARIABLE(VAR, uint, 8, 16);		\
+  DECL_VARIABLE(VAR, uint, 16, 8);		\
+  DECL_VARIABLE(VAR, uint, 32, 4);		\
+  DECL_VARIABLE(VAR, float, 32, 4)
+
+  DECL_VABD_VAR(vector1);
+  DECL_VABD_VAR(vector2);
+  DECL_VABD_VAR(vector_res);
+
+  clean_results ();
+
+  /* Initialize input "vector1" from "buffer".  */
+  VLOAD(vector1, buffer, , int, s, 8, 8);
+  VLOAD(vector1, buffer, , int, s, 16, 4);
+  VLOAD(vector1, buffer, , int, s, 32, 2);
+  VLOAD(vector1, buffer, , uint, u, 8, 8);
+  VLOAD(vector1, buffer, , uint, u, 16, 4);
+  VLOAD(vector1, buffer, , uint, u, 32, 2);
+  VLOAD(vector1, buffer, , float, f, 32, 2);
+  VLOAD(vector1, buffer, q, int, s, 8, 16);
+  VLOAD(vector1, buffer, q, int, s, 16, 8);
+  VLOAD(vector1, buffer, q, int, s, 32, 4);
+  VLOAD(vector1, buffer, q, uint, u, 8, 16);
+  VLOAD(vector1, buffer, q, uint, u, 16, 8);
+  VLOAD(vector1, buffer, q, uint, u, 32, 4);
+  VLOAD(vector1, buffer, q, float, f, 32, 4);
+
+  /* Choose init value arbitrarily.  */
+  VDUP(vector2, , int, s, 8, 8, 1);
+  VDUP(vector2, , int, s, 16, 4, -13);
+  VDUP(vector2, , int, s, 32, 2, 8);
+  VDUP(vector2, , uint, u, 8, 8, 1);
+  VDUP(vector2, , uint, u, 16, 4, 13);
+  VDUP(vector2, , uint, u, 32, 2, 8);
+  VDUP(vector2, , float, f, 32, 2, 8.3f);
+  VDUP(vector2, q, int, s, 8, 16, 10);
+  VDUP(vector2, q, int, s, 16, 8, -12);
+  VDUP(vector2, q, int, s, 32, 4, 32);
+  VDUP(vector2, q, uint, u, 8, 16, 10);
+  VDUP(vector2, q, uint, u, 16, 8, 12);
+  VDUP(vector2, q, uint, u, 32, 4, 32);
+  VDUP(vector2, q, float, f, 32, 4, 32.12f);
+
+  /* Execute the tests.  */
+  TEST_VABD(, int, s, 8, 8);
+  TEST_VABD(, int, s, 16, 4);
+  TEST_VABD(, int, s, 32, 2);
+  TEST_VABD(, uint, u, 8, 8);
+  TEST_VABD(, uint, u, 16, 4);
+  TEST_VABD(, uint, u, 32, 2);
+  TEST_VABD(, float, f, 32, 2);
+  TEST_VABD(q, int, s, 8, 16);
+  TEST_VABD(q, int, s, 16, 8);
+  TEST_VABD(q, int, s, 32, 4);
+  TEST_VABD(q, uint, u, 8, 16);
+  TEST_VABD(q, uint, u, 16, 8);
+  TEST_VABD(q, uint, u, 32, 4);
+  TEST_VABD(q, float, f, 32, 4);
+
+  CHECK_RESULTS (TEST_MSG, "");
+
+
+  /* Extra FP tests with special values (-0.0, ....) */
+  VDUP(vector1, q, float, f, 32, 4, -0.0f);
+  VDUP(vector2, q, float, f, 32, 4, 0.0);
+  TEST_VABD(q, float, f, 32, 4);
+  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_float32, " FP special (-0.0)");
+
+  /* Extra FP tests with special values (-0.0, ....) */
+  VDUP(vector1, q, float, f, 32, 4, 0.0f);
+  VDUP(vector2, q, float, f, 32, 4, -0.0);
+  TEST_VABD(q, float, f, 32, 4);
+  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_float32, " FP special (-0.0)");
+}
+
+int main (void)
+{
+  exec_vabd ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmla_n.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmla_n.c
@@ -0,0 +1,23 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define INSN_NAME vmla
+#define TEST_MSG "VMLA_N"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x595, 0x596, 0x597, 0x598 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xb3a, 0xb3b };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x10df, 0x10e0, 0x10e1, 0x10e2 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x1684, 0x1685 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x4497deb8, 0x4497feb8 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x1c29, 0x1c2a, 0x1c2b, 0x1c2c,
+					0x1c2d, 0x1c2e, 0x1c2f, 0x1c30 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x21ce, 0x21cf, 0x21d0, 0x21d1 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x2773, 0x2774, 0x2775, 0x2776,
+					 0x2777, 0x2778, 0x2779, 0x277a };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x2d18, 0x2d19, 0x2d1a, 0x2d1b };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x4568087b, 0x4568187b,
+					   0x4568287b, 0x4568387b };
+
+#include "vmlX_n.inc"
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul_lane.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul_lane.c
@@ -0,0 +1,104 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xffc0, 0xffc4, 0xffc8, 0xffcc };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffde0, 0xfffffe02 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xbbc0, 0xc004, 0xc448, 0xc88c };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffface0, 0xffffb212 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc3b66666, 0xc3ab0000 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xffc0, 0xffc4, 0xffc8, 0xffcc,
+					0xffd0, 0xffd4, 0xffd8, 0xffdc };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffde0, 0xfffffe02,
+					0xfffffe24, 0xfffffe46 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xbbc0, 0xc004, 0xc448, 0xc88c,
+					 0xccd0, 0xd114, 0xd558, 0xd99c };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffface0, 0xffffb212,
+					 0xffffb744, 0xffffbc76 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc3b66666, 0xc3ab0000,
+					   0xc39f9999, 0xc3943333 };
+
+#define TEST_MSG "VMUL_LANE"
+void exec_vmul_lane (void)
+{
+#define DECL_VMUL(VAR)				\
+  DECL_VARIABLE(VAR, int, 16, 4);		\
+  DECL_VARIABLE(VAR, int, 32, 2);		\
+  DECL_VARIABLE(VAR, uint, 16, 4);		\
+  DECL_VARIABLE(VAR, uint, 32, 2);		\
+  DECL_VARIABLE(VAR, float, 32, 2);		\
+  DECL_VARIABLE(VAR, int, 16, 8);		\
+  DECL_VARIABLE(VAR, int, 32, 4);		\
+  DECL_VARIABLE(VAR, uint, 16, 8);		\
+  DECL_VARIABLE(VAR, uint, 32, 4);		\
+  DECL_VARIABLE(VAR, float, 32, 4)
+
+  /* vector_res = vmul_lane(vector,vector2,lane), then store the result.  */
+#define TEST_VMUL_LANE(Q, T1, T2, W, N, N2, L)				\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    vmul##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N),			\
+			   VECT_VAR(vector2, T1, W, N2),		\
+			   L);						\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),				\
+		    VECT_VAR(vector_res, T1, W, N))
+
+  DECL_VMUL(vector);
+  DECL_VMUL(vector_res);
+
+  DECL_VARIABLE(vector2, int, 16, 4);
+  DECL_VARIABLE(vector2, int, 32, 2);
+  DECL_VARIABLE(vector2, uint, 16, 4);
+  DECL_VARIABLE(vector2, uint, 32, 2);
+  DECL_VARIABLE(vector2, float, 32, 2);
+
+  clean_results ();
+
+  /* Initialize vector from pre-initialized values.  */
+  VLOAD(vector, buffer, , int, s, 16, 4);
+  VLOAD(vector, buffer, , int, s, 32, 2);
+  VLOAD(vector, buffer, , uint, u, 16, 4);
+  VLOAD(vector, buffer, , uint, u, 32, 2);
+  VLOAD(vector, buffer, , float, f, 32, 2);
+  VLOAD(vector, buffer, q, int, s, 16, 8);
+  VLOAD(vector, buffer, q, int, s, 32, 4);
+  VLOAD(vector, buffer, q, uint, u, 16, 8);
+  VLOAD(vector, buffer, q, uint, u, 32, 4);
+  VLOAD(vector, buffer, q, float, f, 32, 4);
+
+  /* Initialize vector2.  */
+  VDUP(vector2, , int, s, 16, 4, 0x4);
+  VDUP(vector2, , int, s, 32, 2, 0x22);
+  VDUP(vector2, , uint, u, 16, 4, 0x444);
+  VDUP(vector2, , uint, u, 32, 2, 0x532);
+  VDUP(vector2, , float, f, 32, 2, 22.8f);
+
+  /* Choose lane arbitrarily.  */
+  TEST_VMUL_LANE(, int, s, 16, 4, 4, 2);
+  TEST_VMUL_LANE(, int, s, 32, 2, 2, 1);
+  TEST_VMUL_LANE(, uint, u, 16, 4, 4, 2);
+  TEST_VMUL_LANE(, uint, u, 32, 2, 2, 1);
+  TEST_VMUL_LANE(, float, f, 32, 2, 2, 1);
+  TEST_VMUL_LANE(q, int, s, 16, 8, 4, 2);
+  TEST_VMUL_LANE(q, int, s, 32, 4, 2, 0);
+  TEST_VMUL_LANE(q, uint, u, 16, 8, 4, 2);
+  TEST_VMUL_LANE(q, uint, u, 32, 4, 2, 1);
+  TEST_VMUL_LANE(q, float, f, 32, 4, 2, 0);
+
+  CHECK(TEST_MSG, int, 16, 4, PRIx64, expected, "");
+  CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, "");
+  CHECK(TEST_MSG, uint, 16, 4, PRIx64, expected, "");
+  CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, "");
+  CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, "");
+  CHECK(TEST_MSG, int, 16, 8, PRIx64, expected, "");
+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, "");
+  CHECK(TEST_MSG, uint, 16, 8, PRIx64, expected, "");
+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, "");
+  CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, "");
+}
+
+int main (void)
+{
+  exec_vmul_lane ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmlsl.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmlsl.c
@@ -0,0 +1,29 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define INSN_NAME vqdmlsl
+#define TEST_MSG "VQDMLSL"
+
+/* Expected values of cumulative_saturation flag.  */
+int VECT_VAR(expected_cumulative_sat,int,32,4) = 0;
+int VECT_VAR(expected_cumulative_sat,int,64,2) = 0;
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xffff83c2, 0xffff83c3,
+					0xffff83c4, 0xffff83c5 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffff83c2,
+					0xffffffffffff83c3 };
+
+/* Expected values of cumulative_saturation flag when saturation
+   occurs.  */
+int VECT_VAR(expected_cumulative_sat2,int,32,4) = 1;
+int VECT_VAR(expected_cumulative_sat2,int,64,2) = 1;
+
+/* Expected results when saturation occurs.  */
+VECT_VAR_DECL(expected2,int,32,4) [] = { 0x80000000, 0x80000000,
+					 0x80000000, 0x80000000 };
+VECT_VAR_DECL(expected2,int,64,2) [] = { 0x8000000000000000,
+					 0x8000000000000000 };
+
+#include "vqdmlXl.inc"
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrsubhn.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrsubhn.c
@@ -0,0 +1,24 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#if defined(__cplusplus)
+#include <cstdint>
+#else
+#include <stdint.h>
+#endif
+
+#define INSN_NAME vrsubhn
+#define TEST_MSG "VRSUBHN"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x31, 0x31, 0x31, 0x31,
+				       0x31, 0x31, 0x31, 0x31 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x31, 0x31, 0x31, 0x31 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x17, 0x17 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x2, 0x2, 0x2, 0x2,
+					0x2, 0x2, 0x2, 0x2 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x36, 0x36, 0x36, 0x36 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x2, 0x2 };
+
+#include "vXXXhn.inc"
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlXl_lane.inc
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlXl_lane.inc
@@ -0,0 +1,70 @@
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+  /* vector_res = vmlxl_lane(vector, vector3, vector4, lane),
+     then store the result.  */
+#define TEST_VMLXL_LANE1(INSN, T1, T2, W, W2, N, V)			\
+  VECT_VAR(vector_res, T1, W, N) =                                      \
+    INSN##_##T2##W2(VECT_VAR(vector, T1, W, N),                         \
+                    VECT_VAR(vector3, T1, W2, N),                       \
+                    VECT_VAR(vector4, T1, W2, N),                       \
+                    V);                                                 \
+  vst1q_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+#define TEST_VMLXL_LANE(INSN, T1, T2, W, W2, N, V)			\
+  TEST_VMLXL_LANE1(INSN, T1, T2, W, W2, N, V)
+
+  DECL_VARIABLE(vector, int, 32, 4);
+  DECL_VARIABLE(vector3, int, 16, 4);
+  DECL_VARIABLE(vector4, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+
+  DECL_VARIABLE(vector, int, 64, 2);
+  DECL_VARIABLE(vector3, int, 32, 2);
+  DECL_VARIABLE(vector4, int, 32, 2);
+  DECL_VARIABLE(vector_res, int, 64, 2);
+
+  DECL_VARIABLE(vector, uint, 32, 4);
+  DECL_VARIABLE(vector3, uint, 16, 4);
+  DECL_VARIABLE(vector4, uint, 16, 4);
+  DECL_VARIABLE(vector_res, uint, 32, 4);
+
+  DECL_VARIABLE(vector, uint, 64, 2);
+  DECL_VARIABLE(vector3, uint, 32, 2);
+  DECL_VARIABLE(vector4, uint, 32, 2);
+  DECL_VARIABLE(vector_res, uint, 64, 2);
+
+  clean_results ();
+
+  VLOAD(vector, buffer, q, int, s, 32, 4);
+  VLOAD(vector, buffer, q, int, s, 64, 2);
+  VLOAD(vector, buffer, q, uint, u, 32, 4);
+  VLOAD(vector, buffer, q, uint, u, 64, 2);
+
+  VDUP(vector3, , int, s, 16, 4, 0x55);
+  VDUP(vector4, , int, s, 16, 4, 0xBB);
+  VDUP(vector3, , int, s, 32, 2, 0x55);
+  VDUP(vector4, , int, s, 32, 2, 0xBB);
+  VDUP(vector3, , uint, u, 16, 4, 0x55);
+  VDUP(vector4, , uint, u, 16, 4, 0xBB);
+  VDUP(vector3, , uint, u, 32, 2, 0x55);
+  VDUP(vector4, , uint, u, 32, 2, 0xBB);
+
+  TEST_VMLXL_LANE(INSN_NAME, int, s, 32, 16, 4, 2);
+  TEST_VMLXL_LANE(INSN_NAME, int, s, 64, 32, 2, 1);
+  TEST_VMLXL_LANE(INSN_NAME, uint, u, 32, 16, 4, 2);
+  TEST_VMLXL_LANE(INSN_NAME, uint, u, 64, 32, 2, 1);
+
+  CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, "");
+  CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, "");
+  CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, "");
+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, "");
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabdl.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabdl.c
@@ -0,0 +1,109 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+				       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x11, 0x10, 0xf, 0xe,
+					0xd, 0xc, 0xb, 0xa };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x3, 0x2, 0x1, 0x0 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x18, 0x17 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xef, 0xf0, 0xf1, 0xf2,
+					 0xf3, 0xf4, 0xf5, 0xf6 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffe3, 0xffe4, 0xffe5, 0xffe6 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffe8,
+					 0xffffffe9 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+					   0x33333333, 0x33333333 };
+
+#define TEST_MSG "VABDL"
+void exec_vabdl (void)
+{
+  /* Basic test: v4=vabdl(v1,v2), then store the result.  */
+#define TEST_VABDL(T1, T2, W, W2, N)					\
+  VECT_VAR(vector_res, T1, W2, N) =					\
+    vabdl_##T2##W(VECT_VAR(vector1, T1, W, N),				\
+		  VECT_VAR(vector2, T1, W, N));				\
+  vst1q_##T2##W2(VECT_VAR(result, T1, W2, N), VECT_VAR(vector_res, T1, W2, N))
+
+#define DECL_VABDL_VAR_LONG(VAR)		\
+  DECL_VARIABLE(VAR, int, 16, 8);		\
+  DECL_VARIABLE(VAR, int, 32, 4);		\
+  DECL_VARIABLE(VAR, int, 64, 2);		\
+  DECL_VARIABLE(VAR, uint, 16, 8);		\
+  DECL_VARIABLE(VAR, uint, 32, 4);		\
+  DECL_VARIABLE(VAR, uint, 64, 2)
+
+#define DECL_VABDL_VAR_SHORT(VAR)		\
+  DECL_VARIABLE(VAR, int, 8, 8);		\
+  DECL_VARIABLE(VAR, int, 16, 4);		\
+  DECL_VARIABLE(VAR, int, 32, 2);		\
+  DECL_VARIABLE(VAR, uint, 8, 8);		\
+  DECL_VARIABLE(VAR, uint, 16, 4);		\
+  DECL_VARIABLE(VAR, uint, 32, 2)
+
+  DECL_VABDL_VAR_SHORT(vector1);
+  DECL_VABDL_VAR_SHORT(vector2);
+  DECL_VABDL_VAR_LONG(vector_res);
+
+  clean_results ();
+
+  /* Initialize input "vector1" from "buffer".  */
+  VLOAD(vector1, buffer, , int, s, 8, 8);
+  VLOAD(vector1, buffer, , int, s, 16, 4);
+  VLOAD(vector1, buffer, , int, s, 32, 2);
+  VLOAD(vector1, buffer, , uint, u, 8, 8);
+  VLOAD(vector1, buffer, , uint, u, 16, 4);
+  VLOAD(vector1, buffer, , uint, u, 32, 2);
+
+  /* Choose init value arbitrarily.  */
+  VDUP(vector2, , int, s, 8, 8, 1);
+  VDUP(vector2, , int, s, 16, 4, -13);
+  VDUP(vector2, , int, s, 32, 2, 8);
+  VDUP(vector2, , uint, u, 8, 8, 1);
+  VDUP(vector2, , uint, u, 16, 4, 13);
+  VDUP(vector2, , uint, u, 32, 2, 8);
+
+  /* Execute the tests.  */
+  TEST_VABDL(int, s, 8, 16, 8);
+  TEST_VABDL(int, s, 16, 32, 4);
+  TEST_VABDL(int, s, 32, 64, 2);
+  TEST_VABDL(uint, u, 8, 16, 8);
+  TEST_VABDL(uint, u, 16, 32, 4);
+  TEST_VABDL(uint, u, 32, 64, 2);
+
+  CHECK_RESULTS (TEST_MSG, "");
+}
+
+int main (void)
+{
+  exec_vabdl ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vand.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vand.c
@@ -0,0 +1,45 @@
+#define INSN_NAME vand
+#define TEST_MSG "VAND/VANDQ"
+
+#include "binary_op.inc"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x0, 0x0, 0x2, 0x2,
+				       0x0, 0x0, 0x2, 0x2 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x1 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x60 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x10, 0x10, 0x10, 0x10,
+					0x14, 0x14, 0x14, 0x14 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x10, 0x10, 0x12, 0x12 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x20, 0x20 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x0 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf0, 0xf2, 0xf2,
+					0xf4, 0xf4, 0xf6, 0xf6,
+					0xf0, 0xf0, 0xf2, 0xf2,
+					0xf4, 0xf4, 0xf6, 0xf6 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xffe0, 0xffe0, 0xffe0, 0xffe0,
+					0xffe4, 0xffe4, 0xffe4, 0xffe4 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffe0, 0xffffffe0,
+					0xffffffe2, 0xffffffe2 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x10, 0x10 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
+					 0x4, 0x4, 0x4, 0x4,
+					 0x8, 0x8, 0x8, 0x8,
+					 0xc, 0xc, 0xc, 0xc };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x1, 0x2, 0x3,
+					 0x0, 0x1, 0x2, 0x3 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x30, 0x31, 0x32, 0x33 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x0, 0x1 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+					   0x33333333, 0x33333333 };
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcle.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcle.c
@@ -0,0 +1,80 @@
+#define INSN_NAME vcle
+#define TEST_MSG "VCLE/VCLEQ"
+
+#include "cmp_op.inc"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+				       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
+					0xff, 0xff, 0xff, 0x0 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0x0 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0x0 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333,
+					0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x33333333, 0x33333333,
+					0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+					0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
+					 0xff, 0xff, 0xff, 0xff,
+					 0xff, 0xff, 0xff, 0xff,
+					 0xff, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,
+					 0xffff, 0xffff, 0xffff, 0x0 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff,
+					 0xffffffff, 0x0 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+					 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+					   0x33333333, 0x33333333 };
+
+VECT_VAR_DECL(expected_uint,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
+					     0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_uint,uint,16,4) [] = { 0xffff, 0xffff, 0xffff, 0x0 };
+VECT_VAR_DECL(expected_uint,uint,32,2) [] = { 0xffffffff, 0xffffffff };
+
+VECT_VAR_DECL(expected_q_uint,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
+						0xff, 0x0, 0x0, 0x0,
+						0x0, 0x0, 0x0, 0x0,
+						0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_q_uint,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,
+						0xffff, 0xffff, 0xffff, 0x0 };
+VECT_VAR_DECL(expected_q_uint,uint,32,4) [] = { 0xffffffff, 0xffffffff,
+						0xffffffff, 0x0 };
+
+VECT_VAR_DECL(expected_float,uint,32,2) [] = { 0xffffffff, 0xffffffff };
+VECT_VAR_DECL(expected_q_float,uint,32,4) [] = { 0xffffffff, 0xffffffff,
+						 0xffffffff, 0x0 };
+
+VECT_VAR_DECL(expected_uint2,uint,32,2) [] = { 0xffffffff, 0x0 };
+VECT_VAR_DECL(expected_uint3,uint,32,2) [] = { 0xffffffff, 0xffffffff };
+VECT_VAR_DECL(expected_uint4,uint,32,2) [] = { 0xffffffff, 0x0 };
+
+VECT_VAR_DECL(expected_nan,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_mnan,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_nan2,uint,32,2) [] = { 0x0, 0x0 };
+
+VECT_VAR_DECL(expected_inf,uint,32,2) [] = { 0xffffffff, 0xffffffff };
+VECT_VAR_DECL(expected_minf,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_inf2,uint,32,2) [] = { 0x0, 0x0 };
+
+VECT_VAR_DECL(expected_mzero,uint,32,2) [] = { 0xffffffff, 0xffffffff };
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddl.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddl.c
@@ -0,0 +1,51 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define INSN_NAME vaddl
+#define TEST_MSG "VADDL"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+				       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,8) [] = {  0xffe3, 0xffe4, 0xffe5, 0xffe6,
+					 0xffe7, 0xffe8, 0xffe9, 0xffea };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffe2, 0xffffffe3,
+					0xffffffe4, 0xffffffe5 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0xffffffffffffffe0,
+					0xffffffffffffffe1 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x1e3, 0x1e4, 0x1e5, 0x1e6,
+					 0x1e7, 0x1e8, 0x1e9, 0x1ea };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x1ffe1, 0x1ffe2,
+					 0x1ffe3, 0x1ffe4 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x1ffffffe0, 0x1ffffffe1 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+					   0x33333333, 0x33333333 };
+
+#include "vXXXl.inc"
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcale.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcale.c
@@ -0,0 +1,49 @@
+#define INSN_NAME vcale
+#define TEST_MSG "VCALE/VCALEQ"
+
+#include "cmp_fp_op.inc"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+				       0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33,
+					0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333,
+					0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffff, 0xffffffff, 0x0, 0x0 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+					0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x333, 0x3333, 0x3333, 0x3333,
+					 0x333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0xffffffff, 0xffffffff };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+					 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33,
+					 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+					 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+					   0x33333333, 0x33333333 };
+
+VECT_VAR_DECL(expected2,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected2,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsli_n.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsli_n.c
@@ -0,0 +1,162 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define INSN_NAME vsli
+#define TEST_MSG "VSLI_N"
+
+/* Extra tests for functions requiring corner cases tests.  */
+void vsli_extra(void);
+#define EXTRA_TESTS vsli_extra
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0x20, 0x21, 0x22, 0x23,
+				       0x24, 0x25, 0x26, 0x27 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xffe0, 0xffe1, 0xffe2, 0xffe3 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x6, 0x7 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x64fffffff0 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x50, 0x51, 0x52, 0x53,
+					0x50, 0x51, 0x52, 0x53 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x7bf0, 0x7bf1, 0x7bf2, 0x7bf3 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0x3ffffff0, 0x3ffffff1 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x10 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x50, 0x51, 0x52, 0x53,
+					0x50, 0x51, 0x52, 0x53 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x7bf0, 0x7bf1, 0x7bf2, 0x7bf3 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0xd0, 0xd1, 0xd2, 0xd3,
+					0xd4, 0xd5, 0xd6, 0xd7,
+					0xd8, 0xd9, 0xda, 0xdb,
+					0xdc, 0xdd, 0xde, 0xdf };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xff60, 0xff61, 0xff62, 0xff63,
+					0xff64, 0xff65, 0xff66, 0xff67 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xfe2ffff0, 0xfe2ffff1,
+					0xfe2ffff2, 0xfe2ffff3 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x18fff0, 0x18fff1 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x60, 0x61, 0x62, 0x63,
+					 0x64, 0x65, 0x66, 0x67,
+					 0x60, 0x61, 0x62, 0x63,
+					 0x64, 0x65, 0x66, 0x67 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x3ff0, 0x3ff1, 0x3ff2, 0x3ff3,
+					 0x3ff4, 0x3ff5, 0x3ff6, 0x3ff7 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x1bfffff0, 0x1bfffff1,
+					 0x1bfffff2, 0x1bfffff3 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x7ffffffffffff0, 0x7ffffffffffff1 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x60, 0x61, 0x62, 0x63,
+					 0x64, 0x65, 0x66, 0x67,
+					 0x60, 0x61, 0x62, 0x63,
+					 0x64, 0x65, 0x66, 0x67 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3ff0, 0x3ff1, 0x3ff2, 0x3ff3,
+					 0x3ff4, 0x3ff5, 0x3ff6, 0x3ff7 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+					   0x33333333, 0x33333333 };
+
+/* Expected results with max shift amount.  */
+VECT_VAR_DECL(expected_max_shift,int,8,8) [] = { 0x70, 0x71, 0x72, 0x73,
+						 0x74, 0x75, 0x76, 0x77 };
+VECT_VAR_DECL(expected_max_shift,int,16,4) [] = { 0x7ff0, 0x7ff1,
+						  0x7ff2, 0x7ff3 };
+VECT_VAR_DECL(expected_max_shift,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected_max_shift,int,64,1) [] = { 0x7ffffffffffffff0 };
+VECT_VAR_DECL(expected_max_shift,uint,8,8) [] = { 0x70, 0x71, 0x72, 0x73,
+						  0x74, 0x75, 0x76, 0x77 };
+VECT_VAR_DECL(expected_max_shift,uint,16,4) [] = { 0x7ff0, 0x7ff1,
+						   0x7ff2, 0x7ff3 };
+VECT_VAR_DECL(expected_max_shift,uint,32,2) [] = { 0x7ffffff0, 0x7ffffff1 };
+VECT_VAR_DECL(expected_max_shift,uint,64,1) [] = { 0x7ffffffffffffff0 };
+VECT_VAR_DECL(expected_max_shift,poly,8,8) [] = { 0x70, 0x71, 0x72, 0x73,
+						  0x74, 0x75, 0x76, 0x77 };
+VECT_VAR_DECL(expected_max_shift,poly,16,4) [] = { 0x7ff0, 0x7ff1,
+						   0x7ff2, 0x7ff3 };
+VECT_VAR_DECL(expected_max_shift,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected_max_shift,int,8,16) [] = { 0x70, 0x71, 0x72, 0x73,
+						  0x74, 0x75, 0x76, 0x77,
+						  0x78, 0x79, 0x7a, 0x7b,
+						  0x7c, 0x7d, 0x7e, 0x7f };
+VECT_VAR_DECL(expected_max_shift,int,16,8) [] = { 0x7ff0, 0x7ff1, 0x7ff2, 0x7ff3,
+						  0x7ff4, 0x7ff5, 0x7ff6, 0x7ff7 };
+VECT_VAR_DECL(expected_max_shift,int,32,4) [] = { 0x7ffffff0, 0x7ffffff1,
+						  0x7ffffff2, 0x7ffffff3 };
+VECT_VAR_DECL(expected_max_shift,int,64,2) [] = { 0x7ffffffffffffff0,
+						  0x7ffffffffffffff1 };
+VECT_VAR_DECL(expected_max_shift,uint,8,16) [] = { 0x70, 0x71, 0x72, 0x73,
+						   0x74, 0x75, 0x76, 0x77,
+						   0x78, 0x79, 0x7a, 0x7b,
+						   0x7c, 0x7d, 0x7e, 0x7f };
+VECT_VAR_DECL(expected_max_shift,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+						   0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_max_shift,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
+						   0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected_max_shift,uint,64,2) [] = { 0xfffffffffffffff0,
+						   0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_max_shift,poly,8,16) [] = { 0x70, 0x71, 0x72, 0x73,
+						   0x74, 0x75, 0x76, 0x77,
+						   0x78, 0x79, 0x7a, 0x7b,
+						   0x7c, 0x7d, 0x7e, 0x7f };
+VECT_VAR_DECL(expected_max_shift,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+						   0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected_max_shift,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+						     0x33333333, 0x33333333 };
+
+#include "vsXi_n.inc"
+
+void vsli_extra(void)
+{
+  /* Test cases with maximum shift amount (this amount is different
+     from vsri).  */
+
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+  DECL_VARIABLE_ALL_VARIANTS(vector2);
+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer".  */
+  TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
+
+  /* Fill input vector2 with arbitrary values.  */
+  VDUP(vector2, , int, s, 8, 8, 2);
+  VDUP(vector2, , int, s, 16, 4, -4);
+  VDUP(vector2, , int, s, 32, 2, 3);
+  VDUP(vector2, , int, s, 64, 1, 100);
+  VDUP(vector2, , uint, u, 8, 8, 20);
+  VDUP(vector2, , uint, u, 16, 4, 30);
+  VDUP(vector2, , uint, u, 32, 2, 40);
+  VDUP(vector2, , uint, u, 64, 1, 2);
+  VDUP(vector2, , poly, p, 8, 8, 20);
+  VDUP(vector2, , poly, p, 16, 4, 30);
+  VDUP(vector2, q, int, s, 8, 16, -10);
+  VDUP(vector2, q, int, s, 16, 8, -20);
+  VDUP(vector2, q, int, s, 32, 4, -30);
+  VDUP(vector2, q, int, s, 64, 2, 24);
+  VDUP(vector2, q, uint, u, 8, 16, 12);
+  VDUP(vector2, q, uint, u, 16, 8, 3);
+  VDUP(vector2, q, uint, u, 32, 4, 55);
+  VDUP(vector2, q, uint, u, 64, 2, 3);
+  VDUP(vector2, q, poly, p, 8, 16, 12);
+  VDUP(vector2, q, poly, p, 16, 8, 3);
+
+  /* Use maximum allowed shift amount.  */
+  TEST_VSXI_N(INSN_NAME, , int, s, 8, 8, 7);
+  TEST_VSXI_N(INSN_NAME, , int, s, 16, 4, 15);
+  TEST_VSXI_N(INSN_NAME, , int, s, 32, 2, 31);
+  TEST_VSXI_N(INSN_NAME, , int, s, 64, 1, 63);
+  TEST_VSXI_N(INSN_NAME, , uint, u, 8, 8, 7);
+  TEST_VSXI_N(INSN_NAME, , uint, u, 16, 4, 15);
+  TEST_VSXI_N(INSN_NAME, , uint, u, 32, 2, 31);
+  TEST_VSXI_N(INSN_NAME, , uint, u, 64, 1, 63);
+  TEST_VSXI_N(INSN_NAME, , poly, p, 8, 8, 7);
+  TEST_VSXI_N(INSN_NAME, , poly, p, 16, 4, 15);
+  TEST_VSXI_N(INSN_NAME, q, int, s, 8, 16, 7);
+  TEST_VSXI_N(INSN_NAME, q, int, s, 16, 8, 15);
+  TEST_VSXI_N(INSN_NAME, q, int, s, 32, 4, 31);
+  TEST_VSXI_N(INSN_NAME, q, int, s, 64, 2, 63);
+  TEST_VSXI_N(INSN_NAME, q, uint, u, 8, 16, 7);
+  TEST_VSXI_N(INSN_NAME, q, uint, u, 16, 8, 15);
+  TEST_VSXI_N(INSN_NAME, q, uint, u, 32, 4, 31);
+  TEST_VSXI_N(INSN_NAME, q, uint, u, 64, 2, 63);
+  TEST_VSXI_N(INSN_NAME, q, poly, p, 8, 16, 7);
+  TEST_VSXI_N(INSN_NAME, q, poly, p, 16, 8, 15);
+
+  CHECK_RESULTS_NAMED (TEST_MSG, expected_max_shift, "(max shift amount)");
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
@@ -0,0 +1,565 @@
+/* This file defines helper operations shared by all the tests.  */
+
+#ifndef _ARM_NEON_REF_H_
+#define _ARM_NEON_REF_H_
+
+#include <stdio.h>
+#include <inttypes.h>
+
+/* helper type, to help write floating point results in integer form.  */
+typedef uint32_t hfloat32_t;
+
+extern void abort(void);
+extern void *memset(void *, int, size_t);
+extern void *memcpy(void *, const void *, size_t);
+extern size_t strlen(const char *);
+
+/* Various string construction helpers.  */
+
+/*
+  The most useful at user-level are VECT_VAR and VECT_VAR_DECL, which
+   construct variable names or declarations, such as:
+   VECT_VAR(expected, int, 16, 4) -> expected_int16x4
+   VECT_VAR_DECL(expected, int, 16, 4) -> int16x4_t expected_int16x4
+*/
+
+#define xSTR(X) #X
+#define STR(X) xSTR(X)
+
+#define xNAME1(V,T) V ## _ ##  T
+#define xNAME(V,T) xNAME1(V,T)
+
+/* VAR(foo,int,16) -> foo_int16 */
+#define VAR(V,T,W) xNAME(V,T##W)
+/* VAR_DECL(foo,int,16) -> int16_t foo_int16 */
+#define VAR_DECL(V, T, W) T##W##_t VAR(V,T,W)
+
+/* VECT_NAME(int,16,4) ->  int16x4 */
+#define VECT_NAME(T, W, N) T##W##x##N
+/* VECT_ARRAY_NAME(int,16,4,2) -> int16x4x2 */
+#define VECT_ARRAY_NAME(T, W, N, L) T##W##x##N##x##L
+/* VECT_TYPE(int,16,4) -> int16x4_t */
+#define VECT_TYPE(T, W, N) xNAME(VECT_NAME(T,W,N),t)
+/* VECT_ARRAY_TYPE(int,16,4,2) -> int16x4x2_t */
+#define VECT_ARRAY_TYPE(T, W, N, L) xNAME(VECT_ARRAY_NAME(T,W,N,L),t)
+
+/* VECT_VAR(foo,int,16,4) -> foo_int16x4 */
+#define VECT_VAR(V,T,W,N) xNAME(V,VECT_NAME(T,W,N))
+/* VECT_VAR_DECL(foo,int,16,4) -> int16_t foo_int16x4 */
+#define VECT_VAR_DECL(V, T, W, N) T##W##_t VECT_VAR(V,T,W,N)
+
+/* Array declarations.  */
+/* ARRAY(foo,int,16,4) -> int16_t foo_int16x4[4] */
+#define ARRAY(V, T, W, N) VECT_VAR_DECL(V,T,W,N)[N]
+
+/* Arrays of vectors.  */
+/* VECT_ARRAY_VAR(foo,int,16,4,2) -> foo_int16x4x2 */
+#define VECT_ARRAY_VAR(V,T,W,N,L) xNAME(V,VECT_ARRAY_NAME(T,W,N,L))
+/* VECT_ARRAY(foo,int,16,4,2) -> int16_t foo_int16x4x2[4*2] */
+#define VECT_ARRAY(V, T, W, N, L) T##W##_t VECT_ARRAY_VAR(V,T,W,N,L)[N*L]
+
+/* Check results vs expected values. Operates on one vector.  */
+#define CHECK(MSG,T,W,N,FMT,EXPECTED,COMMENT)				\
+  {									\
+    int i;								\
+    for(i=0; i<N ; i++)							\
+      {									\
+	if (VECT_VAR(result, T, W, N)[i] !=				\
+	    VECT_VAR(EXPECTED, T, W, N)[i]) {				\
+	  fprintf(stderr,						\
+		  "ERROR in %s (%s line %d in buffer '%s') at type %s "	\
+		  "index %d: got 0x%" FMT " != 0x%" FMT " %s\n",	\
+		  MSG, __FILE__, __LINE__,				\
+		  STR(EXPECTED),					\
+		  STR(VECT_NAME(T, W, N)),				\
+		  i,							\
+		  VECT_VAR(result, T, W, N)[i],				\
+		  VECT_VAR(EXPECTED, T, W, N)[i],			\
+		  strlen(COMMENT) > 0 ? COMMENT : "");			\
+	  abort();							\
+	}								\
+      }									\
+    fprintf(stderr, "CHECKED %s\n", MSG);				\
+  }
+
+/* Floating-point variant.  */
+#define CHECK_FP(MSG,T,W,N,FMT,EXPECTED,COMMENT)			\
+  {									\
+    int i;								\
+    for(i=0; i<N ; i++)							\
+      {									\
+	union fp_operand {						\
+	  uint##W##_t i;						\
+	  float##W##_t f;						\
+	} tmp_res, tmp_exp;						\
+	tmp_res.f = VECT_VAR(result, T, W, N)[i];			\
+	tmp_exp.i = VECT_VAR(EXPECTED, h##T, W, N)[i];			\
+	if (tmp_res.i != tmp_exp.i) {					\
+	  fprintf(stderr,						\
+		  "ERROR in %s (%s line %d in buffer '%s') at type %s "	\
+		  "index %d: got 0x%" FMT " != 0x%" FMT " %s\n",	\
+		  MSG, __FILE__, __LINE__,				\
+		  STR(EXPECTED),					\
+		  STR(VECT_NAME(T, W, N)),				\
+		  i,							\
+		  tmp_res.i,						\
+		  tmp_exp.i,						\
+		  strlen(COMMENT) > 0 ? COMMENT : "");			\
+	  abort();							\
+	}								\
+      }									\
+    fprintf(stderr, "CHECKED %s\n", MSG);				\
+  }
+
+/* Clean buffer with a non-zero pattern to help diagnose buffer
+   overflows.  */
+#define CLEAN_PATTERN_8  0x33
+
+#define CLEAN(VAR,T,W,N)						\
+  memset(VECT_VAR(VAR, T, W, N),					\
+	 CLEAN_PATTERN_8,						\
+	 sizeof(VECT_VAR(VAR, T, W, N)));
+
+/* Define output buffers, one of each size.  */
+static ARRAY(result, int, 8, 8);
+static ARRAY(result, int, 16, 4);
+static ARRAY(result, int, 32, 2);
+static ARRAY(result, int, 64, 1);
+static ARRAY(result, uint, 8, 8);
+static ARRAY(result, uint, 16, 4);
+static ARRAY(result, uint, 32, 2);
+static ARRAY(result, uint, 64, 1);
+static ARRAY(result, poly, 8, 8);
+static ARRAY(result, poly, 16, 4);
+static ARRAY(result, float, 32, 2);
+static ARRAY(result, int, 8, 16);
+static ARRAY(result, int, 16, 8);
+static ARRAY(result, int, 32, 4);
+static ARRAY(result, int, 64, 2);
+static ARRAY(result, uint, 8, 16);
+static ARRAY(result, uint, 16, 8);
+static ARRAY(result, uint, 32, 4);
+static ARRAY(result, uint, 64, 2);
+static ARRAY(result, poly, 8, 16);
+static ARRAY(result, poly, 16, 8);
+static ARRAY(result, float, 32, 4);
+
+/* Declare expected results, one of each size. They are defined and
+   initialized in each test file.  */
+extern ARRAY(expected, int, 8, 8);
+extern ARRAY(expected, int, 16, 4);
+extern ARRAY(expected, int, 32, 2);
+extern ARRAY(expected, int, 64, 1);
+extern ARRAY(expected, uint, 8, 8);
+extern ARRAY(expected, uint, 16, 4);
+extern ARRAY(expected, uint, 32, 2);
+extern ARRAY(expected, uint, 64, 1);
+extern ARRAY(expected, poly, 8, 8);
+extern ARRAY(expected, poly, 16, 4);
+extern ARRAY(expected, hfloat, 32, 2);
+extern ARRAY(expected, int, 8, 16);
+extern ARRAY(expected, int, 16, 8);
+extern ARRAY(expected, int, 32, 4);
+extern ARRAY(expected, int, 64, 2);
+extern ARRAY(expected, uint, 8, 16);
+extern ARRAY(expected, uint, 16, 8);
+extern ARRAY(expected, uint, 32, 4);
+extern ARRAY(expected, uint, 64, 2);
+extern ARRAY(expected, poly, 8, 16);
+extern ARRAY(expected, poly, 16, 8);
+extern ARRAY(expected, hfloat, 32, 4);
+
+/* Check results. Operates on all possible vector types.  */
+#define CHECK_RESULTS(test_name,comment)				\
+  {									\
+    CHECK(test_name, int, 8, 8, PRIx8, expected, comment);		\
+    CHECK(test_name, int, 16, 4, PRIx16, expected, comment);		\
+    CHECK(test_name, int, 32, 2, PRIx32, expected, comment);		\
+    CHECK(test_name, int, 64, 1, PRIx64, expected, comment);		\
+    CHECK(test_name, uint, 8, 8, PRIx8, expected, comment);		\
+    CHECK(test_name, uint, 16, 4, PRIx16, expected, comment);		\
+    CHECK(test_name, uint, 32, 2, PRIx32, expected, comment);		\
+    CHECK(test_name, uint, 64, 1, PRIx64, expected, comment);		\
+    CHECK(test_name, poly, 8, 8, PRIx8, expected, comment);		\
+    CHECK(test_name, poly, 16, 4, PRIx16, expected, comment);		\
+    CHECK_FP(test_name, float, 32, 2, PRIx32, expected, comment);	\
+									\
+    CHECK(test_name, int, 8, 16, PRIx8, expected, comment);		\
+    CHECK(test_name, int, 16, 8, PRIx16, expected, comment);		\
+    CHECK(test_name, int, 32, 4, PRIx32, expected, comment);		\
+    CHECK(test_name, int, 64, 2, PRIx64, expected, comment);		\
+    CHECK(test_name, uint, 8, 16, PRIx8, expected, comment);		\
+    CHECK(test_name, uint, 16, 8, PRIx16, expected, comment);		\
+    CHECK(test_name, uint, 32, 4, PRIx32, expected, comment);		\
+    CHECK(test_name, uint, 64, 2, PRIx64, expected, comment);		\
+    CHECK(test_name, poly, 8, 16, PRIx8, expected, comment);		\
+    CHECK(test_name, poly, 16, 8, PRIx16, expected, comment);		\
+    CHECK_FP(test_name, float, 32, 4, PRIx32, expected, comment);	\
+  }									\
+
+#define CHECK_RESULTS_NAMED(test_name,EXPECTED,comment)			\
+  {									\
+    CHECK(test_name, int, 8, 8, PRIx8, EXPECTED, comment);		\
+    CHECK(test_name, int, 16, 4, PRIx16, EXPECTED, comment);		\
+    CHECK(test_name, int, 32, 2, PRIx32, EXPECTED, comment);		\
+    CHECK(test_name, int, 64, 1, PRIx64, EXPECTED, comment);		\
+    CHECK(test_name, uint, 8, 8, PRIx8, EXPECTED, comment);		\
+    CHECK(test_name, uint, 16, 4, PRIx16, EXPECTED, comment);		\
+    CHECK(test_name, uint, 32, 2, PRIx32, EXPECTED, comment);		\
+    CHECK(test_name, uint, 64, 1, PRIx64, EXPECTED, comment);		\
+    CHECK(test_name, poly, 8, 8, PRIx8, EXPECTED, comment);		\
+    CHECK(test_name, poly, 16, 4, PRIx16, EXPECTED, comment);		\
+    CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment);	\
+									\
+    CHECK(test_name, int, 8, 16, PRIx8, EXPECTED, comment);		\
+    CHECK(test_name, int, 16, 8, PRIx16, EXPECTED, comment);		\
+    CHECK(test_name, int, 32, 4, PRIx32, EXPECTED, comment);		\
+    CHECK(test_name, int, 64, 2, PRIx64, EXPECTED, comment);		\
+    CHECK(test_name, uint, 8, 16, PRIx8, EXPECTED, comment);		\
+    CHECK(test_name, uint, 16, 8, PRIx16, EXPECTED, comment);		\
+    CHECK(test_name, uint, 32, 4, PRIx32, EXPECTED, comment);		\
+    CHECK(test_name, uint, 64, 2, PRIx64, EXPECTED, comment);		\
+    CHECK(test_name, poly, 8, 16, PRIx8, EXPECTED, comment);		\
+    CHECK(test_name, poly, 16, 8, PRIx16, EXPECTED, comment);		\
+    CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment);	\
+  }									\
+
+
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+
+typedef union {
+  struct {
+    int _xxx:25;
+    unsigned int DN:1;
+    unsigned int AHP:1;
+    unsigned int QC:1;
+    int V:1;
+    int C:1;
+    int Z:1;
+    int N:1;
+  } b;
+  unsigned int word;
+} _ARM_FPSCR;
+
+#else /* __ORDER_BIG_ENDIAN__ */
+
+typedef union {
+  struct {
+    int N:1;
+    int Z:1;
+    int C:1;
+    int V:1;
+    unsigned int QC:1;
+    unsigned int AHP:1;
+    unsigned int DN:1;
+    int _dnm:25;
+  } b;
+  unsigned int word;
+} _ARM_FPSCR;
+
+#endif /* __ORDER_BIG_ENDIAN__ */
+
+#define Neon_Cumulative_Sat  __read_neon_cumulative_sat()
+/* We need a fake dependency to ensure correct ordering of asm
+   statements to preset the QC flag value, and Neon operators writing
+   to QC. */
+#define Set_Neon_Cumulative_Sat(x, depend)	\
+  __set_neon_cumulative_sat((x), (depend))
+
+#if defined(__aarch64__)
+static volatile int __read_neon_cumulative_sat (void) {
+    _ARM_FPSCR _afpscr_for_qc;
+    asm volatile ("mrs %0,fpsr" : "=r" (_afpscr_for_qc));
+    return _afpscr_for_qc.b.QC;
+}
+#define __set_neon_cumulative_sat(x, depend) {				\
+    _ARM_FPSCR _afpscr_for_qc;						\
+    asm volatile ("mrs %0,fpsr" : "=r" (_afpscr_for_qc));		\
+    _afpscr_for_qc.b.QC = x;						\
+    asm volatile ("msr fpsr,%1" : "=X" (depend) : "r" (_afpscr_for_qc)); \
+  }
+#else
+static volatile int __read_neon_cumulative_sat (void) {
+    _ARM_FPSCR _afpscr_for_qc;
+    asm volatile ("vmrs %0,fpscr" : "=r" (_afpscr_for_qc));
+    return _afpscr_for_qc.b.QC;
+}
+
+#define __set_neon_cumulative_sat(x, depend) {				\
+    _ARM_FPSCR _afpscr_for_qc;						\
+    asm volatile ("vmrs %0,fpscr" : "=r" (_afpscr_for_qc));		\
+    _afpscr_for_qc.b.QC = x;						\
+    asm volatile ("vmsr fpscr,%1" : "=X" (depend) : "r" (_afpscr_for_qc)); \
+  }
+#endif
+
+/* Declare expected cumulative saturation results, one for each
+   size. They are defined and initialized in relevant test files.  */
+extern int VECT_VAR(expected_cumulative_sat, int, 8, 8);
+extern int VECT_VAR(expected_cumulative_sat, int, 16, 4);
+extern int VECT_VAR(expected_cumulative_sat, int, 32, 2);
+extern int VECT_VAR(expected_cumulative_sat, int, 64, 1);
+extern int VECT_VAR(expected_cumulative_sat, uint, 8, 8);
+extern int VECT_VAR(expected_cumulative_sat, uint, 16, 4);
+extern int VECT_VAR(expected_cumulative_sat, uint, 32, 2);
+extern int VECT_VAR(expected_cumulative_sat, uint, 64, 1);
+extern int VECT_VAR(expected_cumulative_sat, int, 8, 16);
+extern int VECT_VAR(expected_cumulative_sat, int, 16, 8);
+extern int VECT_VAR(expected_cumulative_sat, int, 32, 4);
+extern int VECT_VAR(expected_cumulative_sat, int, 64, 2);
+extern int VECT_VAR(expected_cumulative_sat, uint, 8, 16);
+extern int VECT_VAR(expected_cumulative_sat, uint, 16, 8);
+extern int VECT_VAR(expected_cumulative_sat, uint, 32, 4);
+extern int VECT_VAR(expected_cumulative_sat, uint, 64, 2);
+
+/* Check cumulative saturation flag vs expected value.  */
+#define CHECK_CUMULATIVE_SAT(MSG,T,W,N,EXPECTED,COMMENT)		\
+  {									\
+    if (Neon_Cumulative_Sat !=						\
+	VECT_VAR(EXPECTED, T, W, N)) {					\
+      fprintf(stderr,							\
+	      "ERROR in %s (%s line %d in cumulative_sat '%s') at type %s: " \
+	      "got %d expected %d%s\n",					\
+	      MSG, __FILE__, __LINE__,					\
+	      STR(EXPECTED),						\
+	      STR(VECT_NAME(T, W, N)),					\
+	      Neon_Cumulative_Sat,					\
+	      VECT_VAR(EXPECTED, T, W, N),				\
+	      strlen(COMMENT) > 0 ? " " COMMENT : "");			\
+      abort();								\
+    }									\
+    fprintf(stderr, "CHECKED CUMULATIVE SAT %s\n", MSG);		\
+  }
+
+#define CHECK_CUMULATIVE_SAT_NAMED(test_name,EXPECTED,comment)		\
+  {									\
+    CHECK_CUMULATIVE_SAT(test_name, int, 8, 8, PRIx8, EXPECTED, comment); \
+    CHECK_CUMULATIVE_SAT(test_name, int, 16, 4, PRIx16, EXPECTED, comment); \
+    CHECK_CUMULATIVE_SAT(test_name, int, 32, 2, PRIx32, EXPECTED, comment); \
+    CHECK_CUMULATIVE_SAT(test_name, int, 64, 1, PRIx64, EXPECTED, comment); \
+    CHECK_CUMULATIVE_SAT(test_name, uint, 8, 8, PRIx8, EXPECTED, comment); \
+    CHECK_CUMULATIVE_SAT(test_name, uint, 16, 4, PRIx16, EXPECTED, comment); \
+    CHECK_CUMULATIVE_SAT(test_name, uint, 32, 2, PRIx32, EXPECTED, comment); \
+    CHECK_CUMULATIVE_SAT(test_name, uint, 64, 1, PRIx64, EXPECTED, comment); \
+    CHECK_CUMULATIVE_SAT(test_name, poly, 8, 8, PRIx8, EXPECTED, comment); \
+    CHECK_CUMULATIVE_SAT(test_name, poly, 16, 4, PRIx16, EXPECTED, comment); \
+    CHECK_CUMULATIVE_SAT_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment); \
+									\
+    CHECK_CUMULATIVE_SAT(test_name, int, 8, 16, PRIx8, EXPECTED, comment); \
+    CHECK_CUMULATIVE_SAT(test_name, int, 16, 8, PRIx16, EXPECTED, comment); \
+    CHECK_CUMULATIVE_SAT(test_name, int, 32, 4, PRIx32, EXPECTED, comment); \
+    CHECK_CUMULATIVE_SAT(test_name, int, 64, 2, PRIx64, EXPECTED, comment); \
+    CHECK_CUMULATIVE_SAT(test_name, uint, 8, 16, PRIx8, EXPECTED, comment); \
+    CHECK_CUMULATIVE_SAT(test_name, uint, 16, 8, PRIx16, EXPECTED, comment); \
+    CHECK_CUMULATIVE_SAT(test_name, uint, 32, 4, PRIx32, EXPECTED, comment); \
+    CHECK_CUMULATIVE_SAT(test_name, uint, 64, 2, PRIx64, EXPECTED, comment); \
+    CHECK_CUMULATIVE_SAT(test_name, poly, 8, 16, PRIx8, EXPECTED, comment); \
+    CHECK_CUMULATIVE_SAT(test_name, poly, 16, 8, PRIx16, EXPECTED, comment); \
+    CHECK_CUMULATIVE_SAT_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment); \
+  }									\
+
+
+/* Clean output buffers before execution.  */
+static void clean_results (void)
+{
+  CLEAN(result, int, 8, 8);
+  CLEAN(result, int, 16, 4);
+  CLEAN(result, int, 32, 2);
+  CLEAN(result, int, 64, 1);
+  CLEAN(result, uint, 8, 8);
+  CLEAN(result, uint, 16, 4);
+  CLEAN(result, uint, 32, 2);
+  CLEAN(result, uint, 64, 1);
+  CLEAN(result, poly, 8, 8);
+  CLEAN(result, poly, 16, 4);
+  CLEAN(result, float, 32, 2);
+
+  CLEAN(result, int, 8, 16);
+  CLEAN(result, int, 16, 8);
+  CLEAN(result, int, 32, 4);
+  CLEAN(result, int, 64, 2);
+  CLEAN(result, uint, 8, 16);
+  CLEAN(result, uint, 16, 8);
+  CLEAN(result, uint, 32, 4);
+  CLEAN(result, uint, 64, 2);
+  CLEAN(result, poly, 8, 16);
+  CLEAN(result, poly, 16, 8);
+  CLEAN(result, float, 32, 4);
+
+#if defined(__aarch64__)
+  /* On AArch64, make sure to return DefaultNaN to have the same
+     results as on AArch32.  */
+  _ARM_FPSCR _afpscr_for_dn;
+  asm volatile ("mrs %0,fpcr" : "=r" (_afpscr_for_dn));
+  _afpscr_for_dn.b.DN = 1;
+  asm volatile ("msr fpcr,%0" : : "r" (_afpscr_for_dn));
+#endif
+}
+
+
+/* Helpers to declare variables of various types.   */
+#define DECL_VARIABLE(VAR, T1, W, N)		\
+  VECT_TYPE(T1, W, N) VECT_VAR(VAR, T1, W, N)
+
+/* Declare only 64 bits signed variants.  */
+#define DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR)	\
+  DECL_VARIABLE(VAR, int, 8, 8);			\
+  DECL_VARIABLE(VAR, int, 16, 4);			\
+  DECL_VARIABLE(VAR, int, 32, 2);			\
+  DECL_VARIABLE(VAR, int, 64, 1)
+
+/* Declare only 64 bits unsigned variants.  */
+#define DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR)	\
+  DECL_VARIABLE(VAR, uint, 8, 8);			\
+  DECL_VARIABLE(VAR, uint, 16, 4);			\
+  DECL_VARIABLE(VAR, uint, 32, 2);			\
+  DECL_VARIABLE(VAR, uint, 64, 1)
+
+/* Declare only 128 bits signed variants.  */
+#define DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR)	\
+  DECL_VARIABLE(VAR, int, 8, 16);			\
+  DECL_VARIABLE(VAR, int, 16, 8);			\
+  DECL_VARIABLE(VAR, int, 32, 4);			\
+  DECL_VARIABLE(VAR, int, 64, 2)
+
+/* Declare only 128 bits unsigned variants.  */
+#define DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR)	\
+  DECL_VARIABLE(VAR, uint, 8, 16);			\
+  DECL_VARIABLE(VAR, uint, 16, 8);			\
+  DECL_VARIABLE(VAR, uint, 32, 4);			\
+  DECL_VARIABLE(VAR, uint, 64, 2)
+
+/* Declare all 64 bits variants.  */
+#define DECL_VARIABLE_64BITS_VARIANTS(VAR)	\
+  DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR);	\
+  DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR);	\
+  DECL_VARIABLE(VAR, poly, 8, 8);		\
+  DECL_VARIABLE(VAR, poly, 16, 4);		\
+  DECL_VARIABLE(VAR, float, 32, 2)
+
+/* Declare all 128 bits variants.  */
+#define DECL_VARIABLE_128BITS_VARIANTS(VAR)	\
+  DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR);	\
+  DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR);	\
+  DECL_VARIABLE(VAR, poly, 8, 16);		\
+  DECL_VARIABLE(VAR, poly, 16, 8);		\
+  DECL_VARIABLE(VAR, float, 32, 4)
+
+/* Declare all variants.  */
+#define DECL_VARIABLE_ALL_VARIANTS(VAR)		\
+  DECL_VARIABLE_64BITS_VARIANTS(VAR);		\
+  DECL_VARIABLE_128BITS_VARIANTS(VAR)
+
+/* Declare all signed variants.  */
+#define DECL_VARIABLE_SIGNED_VARIANTS(VAR)	\
+  DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR);	\
+  DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR)
+
+/* Declare all unsigned variants.  */
+#define DECL_VARIABLE_UNSIGNED_VARIANTS(VAR)	\
+  DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR);	\
+  DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR)
+
+/* Helpers to initialize vectors.  */
+#define VDUP(VAR, Q, T1, T2, W, N, V)			\
+  VECT_VAR(VAR, T1, W, N) = vdup##Q##_n_##T2##W(V)
+
+#define VSET_LANE(VAR, Q, T1, T2, W, N, L, V)				\
+  VECT_VAR(VAR, T1, W, N) = vset##Q##_lane_##T2##W(V,			\
+						   VECT_VAR(VAR, T1, W, N), \
+						   L)
+
+/* We need to load initial values first, so rely on VLD1.  */
+#define VLOAD(VAR, BUF, Q, T1, T2, W, N)				\
+  VECT_VAR(VAR, T1, W, N) = vld1##Q##_##T2##W(VECT_VAR(BUF, T1, W, N))
+
+/* Helpers to call macros with 1 constant and 5 variable
+   arguments.  */
+#define TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR)	\
+  MACRO(VAR, , int, s, 8, 8);					\
+  MACRO(VAR, , int, s, 16, 4);					\
+  MACRO(VAR, , int, s, 32, 2);					\
+  MACRO(VAR, , int, s, 64, 1)
+
+#define TEST_MACRO_64BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR)	\
+  MACRO(VAR, , uint, u, 8, 8);					\
+  MACRO(VAR, , uint, u, 16, 4);					\
+  MACRO(VAR, , uint, u, 32, 2);					\
+  MACRO(VAR, , uint, u, 64, 1)
+
+#define TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR)	\
+  MACRO(VAR, q, int, s, 8, 16);					\
+  MACRO(VAR, q, int, s, 16, 8);					\
+  MACRO(VAR, q, int, s, 32, 4);					\
+  MACRO(VAR, q, int, s, 64, 2)
+
+#define TEST_MACRO_128BITS_UNSIGNED_VARIANTS_1_5(MACRO,VAR)	\
+  MACRO(VAR, q, uint, u, 8, 16);				\
+  MACRO(VAR, q, uint, u, 16, 8);				\
+  MACRO(VAR, q, uint, u, 32, 4);				\
+  MACRO(VAR, q, uint, u, 64, 2)
+
+#define TEST_MACRO_64BITS_VARIANTS_1_5(MACRO, VAR)	\
+  TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR);	\
+  TEST_MACRO_64BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR)
+
+#define TEST_MACRO_128BITS_VARIANTS_1_5(MACRO, VAR)	\
+  TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR);	\
+  TEST_MACRO_128BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR)
+
+#define TEST_MACRO_ALL_VARIANTS_1_5(MACRO, VAR)	\
+  TEST_MACRO_64BITS_VARIANTS_1_5(MACRO, VAR);	\
+  TEST_MACRO_128BITS_VARIANTS_1_5(MACRO, VAR)
+
+#define TEST_MACRO_SIGNED_VARIANTS_1_5(MACRO, VAR)	\
+  TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR);	\
+  TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR)
+
+/* Helpers to call macros with 2 constant and 5 variable
+   arguments.  */
+#define TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
+  MACRO(VAR1, VAR2, , int, s, 8, 8);					\
+  MACRO(VAR1, VAR2, , int, s, 16, 4);					\
+  MACRO(VAR1, VAR2, , int, s, 32, 2);					\
+  MACRO(VAR1, VAR2 , , int, s, 64, 1)
+
+#define TEST_MACRO_64BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
+  MACRO(VAR1, VAR2, , uint, u, 8, 8);					\
+  MACRO(VAR1, VAR2, , uint, u, 16, 4);					\
+  MACRO(VAR1, VAR2, , uint, u, 32, 2);					\
+  MACRO(VAR1, VAR2, , uint, u, 64, 1)
+
+#define TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
+  MACRO(VAR1, VAR2, q, int, s, 8, 16);					\
+  MACRO(VAR1, VAR2, q, int, s, 16, 8);					\
+  MACRO(VAR1, VAR2, q, int, s, 32, 4);					\
+  MACRO(VAR1, VAR2, q, int, s, 64, 2)
+
+#define TEST_MACRO_128BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
+  MACRO(VAR1, VAR2, q, uint, u, 8, 16);					\
+  MACRO(VAR1, VAR2, q, uint, u, 16, 8);					\
+  MACRO(VAR1, VAR2, q, uint, u, 32, 4);					\
+  MACRO(VAR1, VAR2, q, uint, u, 64, 2)
+
+#define TEST_MACRO_64BITS_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
+  TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
+  TEST_MACRO_64BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
+  MACRO(VAR1, VAR2, , poly, p, 8, 8);				\
+  MACRO(VAR1, VAR2, , poly, p, 16, 4)
+
+#define TEST_MACRO_128BITS_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
+  TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
+  TEST_MACRO_128BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
+  MACRO(VAR1, VAR2, q, poly, p, 8, 16);				\
+  MACRO(VAR1, VAR2, q, poly, p, 16, 8)
+
+#define TEST_MACRO_ALL_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
+  TEST_MACRO_64BITS_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
+  TEST_MACRO_128BITS_VARIANTS_2_5(MACRO, VAR1, VAR2)
+
+#define TEST_MACRO_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
+  TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
+  TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)
+
+#endif /* _ARM_NEON_REF_H_ */
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmulh_n.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmulh_n.c
@@ -0,0 +1,110 @@
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected values of cumulative_saturation flag.  */
+int VECT_VAR(expected_cumulative_sat,int,16,4) = 0;
+int VECT_VAR(expected_cumulative_sat,int,32,2) = 0;
+int VECT_VAR(expected_cumulative_sat,int,16,8) = 0;
+int VECT_VAR(expected_cumulative_sat,int,32,4) = 0;
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x19, 0x19, 0x19, 0x19 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x4, 0x4 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x10, 0x10, 0x10, 0x10,
+					0x10, 0x10, 0x10, 0x10 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xa, 0xa, 0xa, 0xa };
+
+/* Expected values of cumulative_saturation flag when saturation
+   occurs.  */
+int VECT_VAR(expected_cumulative_sat2,int,16,4) = 1;
+int VECT_VAR(expected_cumulative_sat2,int,32,2) = 1;
+int VECT_VAR(expected_cumulative_sat2,int,16,8) = 1;
+int VECT_VAR(expected_cumulative_sat2,int,32,4) = 1;
+
+/* Expected results when saturation occurs.  */
+VECT_VAR_DECL(expected2,int,16,4) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff };
+VECT_VAR_DECL(expected2,int,32,2) [] = { 0x7fffffff, 0x7fffffff };
+VECT_VAR_DECL(expected2,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff,
+					 0x7fff, 0x7fff, 0x7fff, 0x7fff };
+VECT_VAR_DECL(expected2,int,32,4) [] = { 0x7fffffff, 0x7fffffff,
+					 0x7fffffff, 0x7fffffff };
+
+#define INSN_NAME vqdmulh
+#define TEST_MSG "VQDMULH_N"
+#define FNNAME1(NAME) exec_ ## NAME ## _n
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+  int i;
+
+  /* vector_res = vqdmulh_n(vector,val), then store the result.  */
+#define TEST_VQDMULH_N2(INSN, Q, T1, T2, W, N, L, EXPECTED_CUMULATIVE_SAT, CMT) \
+  Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N));	\
+  VECT_VAR(vector_res, T1, W, N) =				\
+    INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N),		\
+			L);					\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N),			\
+		    VECT_VAR(vector_res, T1, W, N));		\
+  CHECK_CUMULATIVE_SAT(TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
+
+  /* Two auxliary macros are necessary to expand INSN.  */
+#define TEST_VQDMULH_N1(INSN, Q, T1, T2, W, N, L, EXPECTED_CUMULATIVE_SAT, CMT) \
+  TEST_VQDMULH_N2(INSN, Q, T1, T2, W, N, L, EXPECTED_CUMULATIVE_SAT, CMT)
+
+#define TEST_VQDMULH_N(Q, T1, T2, W, N, L, EXPECTED_CUMULATIVE_SAT, CMT)	\
+  TEST_VQDMULH_N1(INSN_NAME, Q, T1, T2, W, N, L, EXPECTED_CUMULATIVE_SAT, CMT)
+
+  DECL_VARIABLE(vector, int, 16, 4);
+  DECL_VARIABLE(vector, int, 32, 2);
+  DECL_VARIABLE(vector, int, 16, 8);
+  DECL_VARIABLE(vector, int, 32, 4);
+
+  DECL_VARIABLE(vector_res, int, 16, 4);
+  DECL_VARIABLE(vector_res, int, 32, 2);
+  DECL_VARIABLE(vector_res, int, 16, 8);
+  DECL_VARIABLE(vector_res, int, 32, 4);
+
+  clean_results ();
+
+  /* Initialize vector.  */
+  VDUP(vector, , int, s, 16, 4, 0x1000);
+  VDUP(vector, , int, s, 32, 2, 0x100023);
+  VDUP(vector, q, int, s, 16, 8, 0x1000);
+  VDUP(vector, q, int, s, 32, 4, 0x100045);
+
+  /* Choose multiplier arbitrarily.  */
+  TEST_VQDMULH_N(, int, s, 16, 4, 0xCF, expected_cumulative_sat, "");
+  TEST_VQDMULH_N(, int, s, 32, 2, 0x2344, expected_cumulative_sat, "");
+  TEST_VQDMULH_N(q, int, s, 16, 8, 0x80, expected_cumulative_sat, "");
+  TEST_VQDMULH_N(q, int, s, 32, 4, 0x5422, expected_cumulative_sat, "");
+
+  CHECK (TEST_MSG, int, 16, 4, PRIx16, expected, "");
+  CHECK (TEST_MSG, int, 32, 2, PRIx32, expected, "");
+  CHECK (TEST_MSG, int, 16, 8, PRIx16, expected, "");
+  CHECK (TEST_MSG, int, 32, 4, PRIx32, expected, "");
+
+  /* Choose input values to trigger saturation.  */
+  VDUP(vector, , int, s, 16, 4, 0x8000);
+  VDUP(vector, , int, s, 32, 2, 0x80000000);
+  VDUP(vector, q, int, s, 16, 8, 0x8000);
+  VDUP(vector, q, int, s, 32, 4, 0x80000000);
+
+#define TEST_MSG2 " (check mul cumulative saturation)"
+  TEST_VQDMULH_N(, int, s, 16, 4, 0x8000, expected_cumulative_sat2, TEST_MSG2);
+  TEST_VQDMULH_N(, int, s, 32, 2, 0x80000000, expected_cumulative_sat2, TEST_MSG2);
+  TEST_VQDMULH_N(q, int, s, 16, 8, 0x8000, expected_cumulative_sat2, TEST_MSG2);
+  TEST_VQDMULH_N(q, int, s, 32, 4, 0x80000000, expected_cumulative_sat2, TEST_MSG2);
+
+  CHECK (TEST_MSG, int, 16, 4, PRIx16, expected2, TEST_MSG2);
+  CHECK (TEST_MSG, int, 32, 2, PRIx32, expected2, TEST_MSG2);
+  CHECK (TEST_MSG, int, 16, 8, PRIx16, expected2, TEST_MSG2);
+  CHECK (TEST_MSG, int, 32, 4, PRIx32, expected2, TEST_MSG2);
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsXi_n.inc
+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsXi_n.inc
@@ -0,0 +1,82 @@
+#define FNNAME1(NAME) exec_ ## NAME ##_n
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+  /* vector_res = vsxi_n(vector, vector2, val),
+     then store the result.  */
+#define TEST_VSXI_N1(INSN, Q, T1, T2, W, N, V)				\
+  VECT_VAR(vector_res, T1, W, N) =					\
+    INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N),			\
+			VECT_VAR(vector2, T1, W, N),			\
+			V);						\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+#define TEST_VSXI_N(INSN, Q, T1, T2, W, N, V)	\
+  TEST_VSXI_N1(INSN, Q, T1, T2, W, N, V)
+
+  DECL_VARIABLE_ALL_VARIANTS(vector);
+  DECL_VARIABLE_ALL_VARIANTS(vector2);
+  DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+  clean_results ();
+
+  /* Initialize input "vector" from "buffer".  */
+  TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
+
+  /* Fill input vector2 with arbitrary values.  */
+  VDUP(vector2, , int, s, 8, 8, 2);
+  VDUP(vector2, , int, s, 16, 4, -4);
+  VDUP(vector2, , int, s, 32, 2, 3);
+  VDUP(vector2, , int, s, 64, 1, 100);
+  VDUP(vector2, , uint, u, 8, 8, 20);
+  VDUP(vector2, , uint, u, 16, 4, 30);
+  VDUP(vector2, , uint, u, 32, 2, 40);
+  VDUP(vector2, , uint, u, 64, 1, 2);
+  VDUP(vector2, , poly, p, 8, 8, 20);
+  VDUP(vector2, , poly, p, 16, 4, 30);
+  VDUP(vector2, q, int, s, 8, 16, -10);
+  VDUP(vector2, q, int, s, 16, 8, -20);
+  VDUP(vector2, q, int, s, 32, 4, -30);
+  VDUP(vector2, q, int, s, 64, 2, 24);
+  VDUP(vector2, q, uint, u, 8, 16, 12);
+  VDUP(vector2, q, uint, u, 16, 8, 3);
+  VDUP(vector2, q, uint, u, 32, 4, 55);
+  VDUP(vector2, q, uint, u, 64, 2, 3);
+  VDUP(vector2, q, poly, p, 8, 16, 12);
+  VDUP(vector2, q, poly, p, 16, 8, 3);
+
+  /* Choose shift amount arbitrarily.  */
+  TEST_VSXI_N(INSN_NAME, , int, s, 8, 8, 4);
+  TEST_VSXI_N(INSN_NAME, , int, s, 16, 4, 3);
+  TEST_VSXI_N(INSN_NAME, , int, s, 32, 2, 1);
+  TEST_VSXI_N(INSN_NAME, , int, s, 64, 1, 32);
+  TEST_VSXI_N(INSN_NAME, , uint, u, 8, 8, 2);
+  TEST_VSXI_N(INSN_NAME, , uint, u, 16, 4, 10);
+  TEST_VSXI_N(INSN_NAME, , uint, u, 32, 2, 30);
+  TEST_VSXI_N(INSN_NAME, , uint, u, 64, 1, 3);
+  TEST_VSXI_N(INSN_NAME, , poly, p, 8, 8, 2);
+  TEST_VSXI_N(INSN_NAME, , poly, p, 16, 4, 10);
+  TEST_VSXI_N(INSN_NAME, q, int, s, 8, 16, 5);
+  TEST_VSXI_N(INSN_NAME, q, int, s, 16, 8, 3);
+  TEST_VSXI_N(INSN_NAME, q, int, s, 32, 4, 20);
+  TEST_VSXI_N(INSN_NAME, q, int, s, 64, 2, 16);
+  TEST_VSXI_N(INSN_NAME, q, uint, u, 8, 16, 3);
+  TEST_VSXI_N(INSN_NAME, q, uint, u, 16, 8, 12);
+  TEST_VSXI_N(INSN_NAME, q, uint, u, 32, 4, 23);
+  TEST_VSXI_N(INSN_NAME, q, uint, u, 64, 2, 53);
+  TEST_VSXI_N(INSN_NAME, q, poly, p, 8, 16, 3);
+  TEST_VSXI_N(INSN_NAME, q, poly, p, 16, 8, 12);
+
+  CHECK_RESULTS (TEST_MSG, "");
+
+#ifdef EXTRA_TESTS
+  EXTRA_TESTS();
+#endif
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_common.h
+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_common.h
@@ -0,0 +1,94 @@
+extern void abort ();
+
+#define CVT(v) ((unsigned char)(v))
+
+static void __attribute__((noinline))
+check_args_8 (int a0, int a1, int a2, int a3, int a4, int a5, int a6, int a7,
+	      int a8)
+{
+  if (a0 != 0
+      || a1 != 1
+      || a2 != 2
+      || a3 != 3
+      || a4 != 4
+      || a5 != 5
+      || a6 != 6
+      || a7 != 7
+      || a8 != 8)
+    abort ();
+}
+
+static void __attribute__((noinline))
+check_args_24 (int a0, int a1, int a2, int a3, int a4, int a5, int a6, int a7,
+	       int a8, int a9, int a10)
+{
+  if (a0 != 0
+      || a1 != 1
+      || a2 != 2
+      || a3 != 3
+      || a4 != 4
+      || a5 != 5
+      || a6 != 6
+      || a7 != 7
+      || a8 != 8
+      || a9 != 9
+      || a10 != 10)
+    abort ();
+}
+
+void __attribute__ ((noinline))
+initialize_array (unsigned char *a, int len)
+{
+  int i;
+
+  for (i = 0; i < (len / 2); i++)
+    {
+      a[i] = i;
+      a[len - i - 1] = i;
+    }
+
+  return;
+}
+
+#define t_frame_pattern(name, local_size, callee_saved)\
+int \
+name (void)\
+{\
+  unsigned char a[local_size];\
+  initialize_array (a, local_size); \
+  __asm__ ("":::callee_saved); \
+  if (a[0] != a[local_size - 1] \
+      || a[0] != 0) \
+    return 0; \
+  if (a[local_size / 2 - 1] != a[local_size / 2] \
+      || a[local_size / 2 - 1] != CVT (local_size / 2 - 1)) \
+    return 0; \
+  return 1; \
+}
+
+#define t_frame_pattern_outgoing(name, local_size, callee_saved, out_going_num, ...)\
+int \
+name (void)\
+{\
+  unsigned char a[local_size];\
+  initialize_array (a, local_size); \
+  __asm__ ("":::callee_saved); \
+  if (a[0] != a[local_size - 1] \
+      || a[0] != 0) \
+    return 0; \
+  if (a[local_size / 2 - 1] != a[local_size / 2] \
+      || a[local_size / 2 - 1] != CVT (local_size / 2 - 1)) \
+    return 0; \
+  check_args_ ## out_going_num (a[0], a[1], a[2], a[3], a[4], a[5], a[6],\
+				a[7], __VA_ARGS__); \
+  return 1; \
+}
+
+#define t_frame_run(name) \
+int \
+main (int argc, char **argv) \
+{\
+  if (!name ())\
+    abort ();\
+  return 0;\
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/vstN_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/vstN_1.c
@@ -0,0 +1,76 @@
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+
+#include <arm_neon.h>
+
+extern void abort (void);
+
+#define TESTMETH(BASE, ELTS, STRUCT, SUFFIX)		\
+int __attribute__ ((noinline))				\
+test_vst##STRUCT##SUFFIX ()				\
+{							\
+  BASE##_t src[ELTS * STRUCT];				\
+  BASE##_t dest[ELTS * STRUCT];				\
+  BASE##x##ELTS##x##STRUCT##_t vectors;			\
+  int i,j;						\
+  for (i = 0; i < STRUCT * ELTS; i++)			\
+    src [i] = (BASE##_t) 2*i + 1;			\
+  for (i = 0; i < STRUCT; i++)				\
+    vectors.val[i] = vld1##SUFFIX (&src[i*ELTS]);	\
+  asm volatile ("" : : : "memory");			\
+  vst##STRUCT##SUFFIX (dest, vectors);			\
+  asm volatile ("" : : : "memory");			\
+  for (i = 0; i < STRUCT; i++)				\
+    {							\
+      for (j = 0; j < ELTS; j++)			\
+        if (src[i*ELTS + j] != dest[i + STRUCT*j])	\
+          return 1;					\
+    }							\
+  return 0;						\
+}
+
+#define VARIANTS(VARIANT, STRUCT)	\
+VARIANT (uint8, 8, STRUCT, _u8)		\
+VARIANT (uint16, 4, STRUCT, _u16)	\
+VARIANT (uint32, 2, STRUCT, _u32)	\
+VARIANT (uint64, 1, STRUCT, _u64)	\
+VARIANT (int8, 8, STRUCT, _s8)		\
+VARIANT (int16, 4, STRUCT, _s16)	\
+VARIANT (int32, 2, STRUCT, _s32)	\
+VARIANT (int64, 1, STRUCT, _s64)	\
+VARIANT (poly8, 8, STRUCT, _p8)		\
+VARIANT (poly16, 4, STRUCT, _p16)	\
+VARIANT (float32, 2, STRUCT, _f32)	\
+VARIANT (float64, 1, STRUCT, _f64)	\
+VARIANT (uint8, 16, STRUCT, q_u8)	\
+VARIANT (uint16, 8, STRUCT, q_u16)	\
+VARIANT (uint32, 4, STRUCT, q_u32)	\
+VARIANT (uint64, 2, STRUCT, q_u64)	\
+VARIANT (int8, 16, STRUCT, q_s8)	\
+VARIANT (int16, 8, STRUCT, q_s16)	\
+VARIANT (int32, 4, STRUCT, q_s32)	\
+VARIANT (int64, 2, STRUCT, q_s64)	\
+VARIANT (poly8, 16, STRUCT, q_p8)	\
+VARIANT (poly16, 8, STRUCT, q_p16)	\
+VARIANT (float32, 4, STRUCT, q_f32)	\
+VARIANT (float64, 2, STRUCT, q_f64)
+
+/* Tests of vst2 and vst2q.  */
+VARIANTS (TESTMETH, 2)
+/* Tests of vst3 and vst3q.  */
+VARIANTS (TESTMETH, 3)
+/* Tests of vst4 and vst4q.  */
+VARIANTS (TESTMETH, 4)
+
+#define CHECK(BASE, ELTS, STRUCT, SUFFIX)	\
+  if (test_vst##STRUCT##SUFFIX () != 0)		\
+    abort ();
+
+int
+main (int argc, char **argv)
+{
+  VARIANTS (CHECK, 2)
+  VARIANTS (CHECK, 3)
+  VARIANTS (CHECK, 4)
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-fmax-fmin.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fmax-fmin.c
@@ -8,11 +8,11 @@
 #include "vect-fmaxv-fminv.x"
 
 #define DEFN_SETV(type) \
-		set_vector_##type (pR##type a, type n)   \
-		{ 				         \
-		  int i;			         \
-		  for (i=0; i<16; i++)		         \
-		    a[i] = n;				 \
+		void set_vector_##type (pR##type a, type n)   \
+		{					      \
+		  int i;				      \
+		  for (i=0; i<16; i++)			      \
+		    a[i] = n;				      \
 		}
 
 #define DEFN_CHECKV(type) \
--- a/src/gcc/testsuite/gcc.target/aarch64/scalar_shift_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/scalar_shift_1.c
@@ -193,7 +193,6 @@
   return b;
 }
 /* { dg-final { scan-assembler "sshr\td\[0-9\]+,\ d\[0-9\]+,\ 63" } } */
-/* { dg-final { scan-assembler "shl\td\[0-9\]+,\ d\[0-9\]+,\ 1" } } */
 
 Int32x1
 test_corners_sisd_si (Int32x1 b)
@@ -207,7 +206,6 @@
   return b;
 }
 /* { dg-final { scan-assembler "sshr\tv\[0-9\]+\.2s,\ v\[0-9\]+\.2s,\ 31" } } */
-/* { dg-final { scan-assembler "shl\tv\[0-9\]+\.2s,\ v\[0-9\]+\.2s,\ 1" } } */
 
 
 
--- a/src/gcc/testsuite/gcc.target/aarch64/vbslq_f64_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/vbslq_f64_1.c
@@ -0,0 +1,21 @@
+/* Test vbslq_f64 can be folded.  */
+/* { dg-do assemble } */
+/* { dg-options "--save-temps -O3" } */
+
+#include <arm_neon.h>
+
+/* Folds to ret.  */
+
+float32x4_t
+fold_me (float32x4_t a, float32x4_t b)
+{
+  uint32x4_t mask = {-1, -1, -1, -1};
+  return vbslq_f32 (mask, a, b);
+}
+
+/* { dg-final { scan-assembler-not "bsl\\tv" } } */
+/* { dg-final { scan-assembler-not "bit\\tv" } } */
+/* { dg-final { scan-assembler-not "bif\\tv" } } */
+
+/* { dg-final { cleanup-saved-temps } } */
+
--- a/src/gcc/testsuite/gcc.target/aarch64/vect-ld1r.x
+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-ld1r.x
@@ -7,7 +7,7 @@
     for (i = 0; i < 8 / sizeof (TYPE); i++) \
       output[i] = *a; \
   } \
-  foo_ ## TYPE ## _q (TYPE *a, TYPE *output) \
+  void foo_ ## TYPE ## _q (TYPE *a, TYPE *output) \
   { \
     int i; \
     for (i = 0; i < 32 / sizeof (TYPE); i++) \
--- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_10.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_10.c
@@ -0,0 +1,21 @@
+/* Verify:
+     * -fomit-frame-pointer.
+     * with outgoing.
+     * total frame size > 512.
+       area except outgoing <= 512
+     * number of callee-saved reg >= 2.
+     * Split stack adjustment into two subtractions.
+       the first subtractions could be optimized into "stp !".  */
+
+/* { dg-do run } */
+/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */
+
+#include "test_frame_common.h"
+
+t_frame_pattern_outgoing (test10, 480, "x19", 24, a[8], a[9], a[10])
+t_frame_run (test10)
+
+/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp, -\[0-9\]+\\\]!" 1 } } */
+/* { dg-final { scan-assembler-times "ldp\tx19, x30, \\\[sp\\\], \[0-9\]+" 1 } } */
+
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/vrnd_f64_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/vrnd_f64_1.c
@@ -0,0 +1,105 @@
+/* Test vrnd_f64 works correctly.  */
+/* { dg-do run } */
+/* { dg-options "--save-temps" } */
+
+#include "arm_neon.h"
+
+extern void abort (void);
+
+/* Bit offset to round mode field in FPCR.  */
+#define RMODE_START 22
+
+#define FPROUNDING_ZERO 3
+
+/* Set RMODE field of FPCR control register
+   to rounding mode passed.  */
+void __inline __attribute__ ((__always_inline__))
+set_rounding_mode (uint32_t mode)
+{
+  uint32_t r;
+
+  /* Read current FPCR.  */
+  asm volatile ("mrs %[r], fpcr" : [r] "=r" (r) : :);
+
+  /* Clear rmode.  */
+  r &= ~(3 << RMODE_START);
+  /* Calculate desired FPCR.  */
+  r |= mode << RMODE_START;
+
+  /* Write desired FPCR back.  */
+  asm volatile ("msr fpcr, %[r]" : : [r] "r" (r) :);
+}
+
+float64x1_t __attribute__ ((noinline))
+compare_f64 (float64x1_t passed, float64_t expected)
+{
+  return (__builtin_fabs (vget_lane_f64 (passed, 0) - expected)
+	  > __DBL_EPSILON__);
+}
+
+void __attribute__ ((noinline))
+run_round_tests (float64x1_t *tests,
+		 float64_t expectations[][6])
+{
+  int i;
+
+  for (i = 0; i < 6; i++)
+    {
+      if (compare_f64 (vrnd_f64 (tests[i]), expectations[0][i]))
+	abort ();
+      if (compare_f64 (vrndx_f64 (tests[i]), expectations[1][i]))
+	abort ();
+      if (compare_f64 (vrndp_f64 (tests[i]), expectations[2][i]))
+	abort ();
+      if (compare_f64 (vrndn_f64 (tests[i]), expectations[3][i]))
+	abort ();
+      if (compare_f64 (vrndm_f64 (tests[i]), expectations[4][i]))
+	abort ();
+      if (compare_f64 (vrndi_f64 (tests[i]), expectations[5][i]))
+	abort ();
+      if (compare_f64 (vrnda_f64 (tests[i]), expectations[6][i]))
+	abort ();
+    }
+}
+
+int
+main (int argc, char **argv)
+{
+  float64x1_t tests[6] =
+    {
+      vcreate_f64 (0x3FE0000000000000), /* Hex for: 0.5.  */
+      vcreate_f64 (0x3FD999999999999A), /* Hex for: 0.4.  */
+      vcreate_f64 (0x3FE3333333333333), /* Hex for: 0.6.  */
+      vcreate_f64 (0xBFE0000000000000), /* Hex for: -0.5.  */
+      vcreate_f64 (0xBFD999999999999A), /* Hex for: -0.4.  */
+      vcreate_f64 (0xBFE3333333333333), /* Hex for: -0.6.  */
+    };
+
+  float64_t expectations[7][6] =
+  {
+    { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 },    /* vrnd - round towards zero.  */
+    { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 },    /* vrndx - round using FPCR mode.  */
+    { 1.0, 1.0, 1.0, 0.0, 0.0, 0.0 },    /* vrndp - round to plus infinity.  */
+    { 0.0, 0.0, 1.0, 0.0, 0.0, -1.0 },   /* vrndn - round ties to even.  */
+    { 0.0, 0.0, 0.0, -1.0, -1.0, -1.0 }, /* vrndm - round to minus infinity.  */
+    { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 },    /* vrndi - round using FPCR mode.  */
+    { 1.0, 0.0, 1.0, -1.0, 0.0, -1.0 },  /* vrnda - round ties away from 0.  */
+  };
+
+  /* Set floating point control register
+     to have predictable vrndx and vrndi behaviour.  */
+  set_rounding_mode (FPROUNDING_ZERO);
+
+  run_round_tests (tests, expectations);
+
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times "frintz\\td\[0-9\]+, d\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "frintx\\td\[0-9\]+, d\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "frintp\\td\[0-9\]+, d\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "frintn\\td\[0-9\]+, d\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "frintm\\td\[0-9\]+, d\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "frinti\\td\[0-9\]+, d\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "frinta\\td\[0-9\]+, d\[0-9\]+" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
@@ -305,13 +305,28 @@
   return res;
 }
 
-/* { dg-final { scan-assembler-times "\\taddp\\td\[0-9\]+, v\[0-9\]+\.2d" 1 } } */
+/* { dg-final { scan-assembler-times "\\tfaddp\\td\[0-9\]+, v\[0-9\]+\.2d" 1 } } */
 
+float64_t
+test_vpaddd_f64 (float64x2_t a)
+{
+  return vpaddd_f64 (a);
+}
+
+/* { dg-final { scan-assembler-times "\\taddp\\td\[0-9\]+, v\[0-9\]+\.2d" 2 } } */
+
+int64_t
 test_vpaddd_s64 (int64x2_t a)
 {
   return vpaddd_s64 (a);
 }
 
+uint64_t
+test_vpaddd_u64 (uint64x2_t a)
+{
+  return vpaddd_u64 (a);
+}
+
 /* { dg-final { scan-assembler-times "\\tuqadd\\td\[0-9\]+" 1 } } */
 
 uint64x1_t
@@ -498,7 +513,7 @@
 
 /* { dg-final { scan-assembler-times "\\tsqdmull\\td\[0-9\]+, s\[0-9\]+, v" 1 } } */
 
-int64x1_t
+int64_t
 test_vqdmulls_lane_s32 (int32_t a, int32x2_t b)
 {
   return vqdmulls_lane_s32 (a, b, 1);
--- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_7.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_7.c
@@ -0,0 +1,20 @@
+/* Verify:
+     * -fomit-frame-pointer.
+     * without outgoing.
+     * total frame size > 512.
+     * number of callee-saved reg == 2.
+     * split stack adjustment into two subtractions.
+       the second subtraction should use "stp !".  */
+
+/* { dg-do run } */
+/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */
+
+#include "test_frame_common.h"
+
+t_frame_pattern (test7, 700, "x19")
+t_frame_run (test7)
+
+/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp, -\[0-9\]+\\\]!" 1 } } */
+/* { dg-final { scan-assembler-times "ldp\tx19, x30, \\\[sp\\\], \[0-9\]+" 2 } } */
+
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/vabs_intrinsic_2.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/vabs_intrinsic_2.c
@@ -0,0 +1,17 @@
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+#include <arm_neon.h>
+
+extern void abort (void);
+
+int
+main (int argc, char **argv)
+{
+  int8x8_t a = vabs_s8 (vdup_n_s8 (-128)); /* Should all be -128.  */
+  uint8x8_t b = vcltz_s8 (a); /* Should all be true i.e. -1. */
+  if (vget_lane_u8 (b, 1))
+    return 0;
+  abort ();
+}
+
--- a/src/gcc/testsuite/gcc.target/aarch64/pic-symrefplus.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/pic-symrefplus.c
@@ -34,6 +34,9 @@
   values [];
 };
 extern const struct locale_data _nl_C_LC_TIME __attribute__ ((visibility ("hidden")));
+extern void *memset (void *s, int c, size_t n);
+extern size_t strlen (const char *s);
+extern int __strncasecmp_l (const char *s1, const char *s2, size_t n, __locale_t locale);
 char *
 __strptime_internal (rp, fmt, tmp, statep , locale)
      const char *rp;
@@ -40,6 +43,7 @@
      const char *fmt;
      __locale_t locale;
      void *statep;
+     int tmp;
 {
   struct locale_data *const current = locale->__locales[__LC_TIME];
   const char *rp_backup;
@@ -124,5 +128,9 @@
 }
 char *
 __strptime_l (buf, format, tm , locale)
+     int buf;
+     int format;
+     int tm;
+     int locale;
 {
 }
--- a/src/gcc/testsuite/gcc.target/aarch64/vbslq_f64_2.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/vbslq_f64_2.c
@@ -0,0 +1,24 @@
+/* Test vbslq_f64 can be folded.  */
+/* { dg-do assemble } */
+/* { dg-options "--save-temps -O3" } */
+
+#include <arm_neon.h>
+
+/* Should fold out one half of the BSL, leaving just a BIC.  */
+
+float32x4_t
+half_fold_me (uint32x4_t mask)
+{
+  float32x4_t a = {0.0, 0.0, 0.0, 0.0};
+  float32x4_t b = {2.0, 4.0, 8.0, 16.0};
+  return vbslq_f32 (mask, a, b);
+
+}
+
+/* { dg-final { scan-assembler-not "bsl\\tv" } } */
+/* { dg-final { scan-assembler-not "bit\\tv" } } */
+/* { dg-final { scan-assembler-not "bif\\tv" } } */
+/* { dg-final { scan-assembler "bic\\tv" } } */
+
+/* { dg-final { cleanup-saved-temps } } */
+
--- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_11.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_11.c
@@ -0,0 +1,16 @@
+/* Verify:
+     * without outgoing.
+     * total frame size <= 512.
+     * number of callee-save reg >= 2.
+     * optimized code should use "stp !" for stack adjustment.  */
+
+/* { dg-do run } */
+/* { dg-options "-O2 --save-temps" } */
+
+#include "test_frame_common.h"
+
+t_frame_pattern (test11, 400, )
+t_frame_run (test11)
+
+/* { dg-final { scan-assembler-times "stp\tx29, x30, \\\[sp, -\[0-9\]+\\\]!" 2 } } */
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/vqneg_s64_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/vqneg_s64_1.c
@@ -0,0 +1,47 @@
+/* Test vqneg_s64 intrinsics work correctly.  */
+/* { dg-do run } */
+/* { dg-options "--save-temps" } */
+
+#include <arm_neon.h>
+
+extern void abort (void);
+
+int __attribute__ ((noinline))
+test_vqneg_s64 (int64x1_t passed, int64_t expected)
+{
+  return vget_lane_s64 (vqneg_s64 (passed), 0) != expected;
+}
+
+int __attribute__ ((noinline))
+test_vqnegd_s64 (int64_t passed, int64_t expected)
+{
+  return vqnegd_s64 (passed) != expected;
+}
+
+/* { dg-final { scan-assembler-times "sqneg\\td\[0-9\]+, d\[0-9\]+" 2 } } */
+
+int
+main (int argc, char **argv)
+{
+  /* Basic test.  */
+  if (test_vqneg_s64 (vcreate_s64 (-1), 1))
+    abort ();
+  if (test_vqnegd_s64 (-1, 1))
+    abort ();
+
+  /* Negating max int64_t.  */
+  if (test_vqneg_s64 (vcreate_s64 (0x7fffffffffffffff), 0x8000000000000001))
+    abort ();
+  if (test_vqnegd_s64 (0x7fffffffffffffff, 0x8000000000000001))
+    abort ();
+
+  /* Negating min int64_t.
+     Note, exact negation cannot be represented as int64_t.  */
+  if (test_vqneg_s64 (vcreate_s64 (0x8000000000000000), 0x7fffffffffffffff))
+    abort ();
+  if (test_vqnegd_s64 (0x8000000000000000, 0x7fffffffffffffff))
+    abort ();
+
+  return 0;
+}
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/vget_low_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/vget_low_1.c
@@ -0,0 +1,60 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -std=c99" } */
+
+#include <arm_neon.h>
+
+extern void abort (void);
+
+#define VARIANTS(VARIANT)				\
+VARIANT (uint8_t, 8, uint8x8_t, uint8x16_t, u8)		\
+VARIANT (uint16_t, 4, uint16x4_t, uint16x8_t, u16)	\
+VARIANT (uint32_t, 2, uint32x2_t, uint32x4_t, u32)	\
+VARIANT (uint64_t, 1, uint64x1_t, uint64x2_t, u64)	\
+VARIANT (int8_t, 8, int8x8_t, int8x16_t, s8)		\
+VARIANT (int16_t, 4, int16x4_t, int16x8_t, s16)		\
+VARIANT (int32_t, 2, int32x2_t, int32x4_t, s32)		\
+VARIANT (int64_t, 1, int64x1_t, int64x2_t, s64)		\
+VARIANT (float32_t, 2, float32x2_t, float32x4_t, f32)	\
+VARIANT (float64_t, 1, float64x1_t, float64x2_t, f64)
+
+
+#define TESTMETH(BASETYPE, NUM64, TYPE64, TYPE128, SUFFIX)	\
+int								\
+test_vget_low_ ##SUFFIX (BASETYPE *data)			\
+{								\
+  BASETYPE temp [NUM64];					\
+  TYPE128 vec = vld1q_##SUFFIX (data);				\
+  TYPE64 low = vget_low_##SUFFIX (vec);				\
+  vst1_##SUFFIX (temp, low);					\
+  for (int i = 0; i < NUM64; i++)				\
+    if (temp[i] != data[i])					\
+      return 1;							\
+  return 0;							\
+}
+
+VARIANTS (TESTMETH)
+
+#define CHECK(BASETYPE, NUM64, TYPE64, TYPE128, SUFFIX)		\
+  if (test_vget_low_##SUFFIX (BASETYPE ## _ ## data) != 0)	\
+    abort ();
+
+int
+main (int argc, char **argv)
+{
+  uint8_t uint8_t_data[16] =
+      { 1, 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47 };
+  uint16_t uint16_t_data[8] = { 1, 22, 333, 4444, 55555, 6666, 777, 88 };
+  uint32_t uint32_t_data[4] = { 65537, 11, 70000, 23 };
+  uint64_t uint64_t_data[2] = { 0xdeadbeefcafebabeULL, 0x0123456789abcdefULL };
+  int8_t int8_t_data[16] =
+      { -1, -3, -5, -7, 9, -11, -13, 15, -17, -19, 21, -23, 25, 27, -29, -31 };
+  int16_t int16_t_data[8] = { -17, 19, 3, -999, 44048, 505, 9999, 1000};
+  int32_t int32_t_data[4] = { 123456789, -987654321, -135792468, 975318642 };
+  int64_t int64_t_data[2] = {0xfedcba9876543210LL, 0xdeadbabecafebeefLL };
+  float32_t float32_t_data[4] = { 3.14159, 2.718, 1.414, 100.0 };
+  float64_t float64_t_data[2] = { 1.01001000100001, 12345.6789 };
+
+  VARIANTS (CHECK);
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/ushr64_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/ushr64_1.c
@@ -42,7 +42,6 @@
   return vshrd_n_u64 (passed, 0) != expected;
 }
 
-/* { dg-final { scan-assembler-times "ushr\\td\[0-9\]+, d\[0-9\]+, 64" 2 } } */
 /* { dg-final { (scan-assembler-times "ushr\\td\[0-9\]+, d\[0-9\]+, 4" 2)  || \
    (scan-assembler-times "lsr\\tx\[0-9\]+, x\[0-9\]+, 4" 2) } } */
 /* { dg-final { scan-assembler-not "ushr\\td\[0-9\]+, d\[0-9\]+, 0" } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_8.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_8.c
@@ -0,0 +1,18 @@
+/* Verify:
+     * -fomit-frame-pointer.
+     * with outgoing.
+     * total frame size bigger than 512.
+     * number of callee-saved reg == 1.  */
+
+/* { dg-do run } */
+/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */
+
+#include "test_frame_common.h"
+
+t_frame_pattern_outgoing (test8, 700, , 8, a[8])
+t_frame_run (test8)
+
+/* { dg-final { scan-assembler-times "str\tx30, \\\[sp, -\[0-9\]+\\\]!" 3 } } */
+/* { dg-final { scan-assembler-times "ldr\tx30, \\\[sp\\\], \[0-9\]+" 3 } } */
+
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/aarch64/vset_lane_1.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/vset_lane_1.c
@@ -0,0 +1,85 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -fno-inline" } */
+
+#include <arm_neon.h>
+
+extern void abort (void);
+
+#define VARIANTS(VARIANT)			\
+VARIANT (uint8_t, , 8, uint8x8_t, _u8, 5)	\
+VARIANT (uint16_t, , 4, uint16x4_t, _u16, 3)	\
+VARIANT (uint32_t, , 2, uint32x2_t, _u32, 1)	\
+VARIANT (uint64_t, , 1, uint64x1_t, _u64, 0)	\
+VARIANT (int8_t, , 8, int8x8_t, _s8, 6)		\
+VARIANT (int16_t, , 4, int16x4_t, _s16, 2)	\
+VARIANT (int32_t, , 2, int32x2_t, _s32, 0)	\
+VARIANT (int64_t, , 1, int64x1_t, _s64, 0)	\
+VARIANT (poly8_t, , 8, poly8x8_t, _p8, 6)	\
+VARIANT (poly16_t, , 4, poly16x4_t, _p16, 2)	\
+VARIANT (float32_t, , 2, float32x2_t, _f32, 1)	\
+VARIANT (float64_t, , 1, float64x1_t, _f64, 0)	\
+VARIANT (uint8_t, q, 16, uint8x16_t, _u8, 11)	\
+VARIANT (uint16_t, q, 8, uint16x8_t, _u16, 7)	\
+VARIANT (uint32_t, q, 4, uint32x4_t, _u32, 2)	\
+VARIANT (uint64_t, q, 2, uint64x2_t, _u64, 1)	\
+VARIANT (int8_t, q, 16, int8x16_t, _s8, 13)	\
+VARIANT (int16_t, q, 8, int16x8_t, _s16, 5)	\
+VARIANT (int32_t, q, 4, int32x4_t, _s32, 3)	\
+VARIANT (int64_t, q, 2, int64x2_t, _s64, 0)	\
+VARIANT (poly8_t, q, 16, poly8x16_t, _p8, 14)	\
+VARIANT (poly16_t, q, 8, poly16x8_t, _p16, 6)	\
+VARIANT (float32_t, q, 4, float32x4_t, _f32, 2) \
+VARIANT (float64_t, q, 2, float64x2_t, _f64, 1)
+
+#define TESTMETH(BASETYPE, Q, NUM, TYPE, SUFFIX, INDEX)	\
+int							\
+test_vset_lane ##Q##SUFFIX (BASETYPE *data)		\
+{							\
+  BASETYPE temp [NUM];					\
+  TYPE vec = vld1##Q##SUFFIX (data);			\
+  TYPE vec2;						\
+  BASETYPE changed = data[INDEX] - INDEX;		\
+  int check;						\
+  vec = vset##Q##_lane##SUFFIX (changed, vec, INDEX);	\
+  asm volatile ("orr %0.16b, %1.16b, %1.16b"		\
+		: "=w"(vec2) : "w" (vec) : );		\
+  vst1##Q##SUFFIX (temp, vec2);				\
+  for (check = 0; check < NUM; check++)			\
+    {							\
+      BASETYPE desired = data[check];			\
+      if (check==INDEX) desired = changed;		\
+      if (temp[check] != desired)			\
+        return 1;					\
+    }							\
+  return 0;						\
+}
+
+VARIANTS (TESTMETH)
+
+#define CHECK(BASETYPE, Q, NUM, TYPE, SUFFIX, INDEX)		\
+  if (test_vset_lane##Q##SUFFIX (BASETYPE ## _ ## data) != 0)	\
+    abort ();
+
+int
+main (int argc, char **argv)
+{
+  uint8_t uint8_t_data[16] =
+      { 1, 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47 };
+  uint16_t uint16_t_data[8] = { 1, 22, 333, 4444, 55555, 6666, 777, 88 };
+  uint32_t uint32_t_data[4] = { 65537, 11, 70000, 23 };
+  uint64_t uint64_t_data[2] = { 0xdeadbeefcafebabeULL, 0x0123456789abcdefULL };
+  int8_t int8_t_data[16] =
+      { -1, -3, -5, -7, 9, -11, -13, 15, -17, -19, 21, -23, 25, 27, -29, -31 };
+  int16_t int16_t_data[8] = { -17, 19, 3, -999, 44048, 505, 9999, 1000};
+  int32_t int32_t_data[4] = { 123456789, -987654321, -135792468, 975318642 };
+  int64_t int64_t_data[2] = {0xfedcba9876543210LL, 0xdeadbabecafebeefLL };
+  poly8_t poly8_t_data[16] =
+      { 0, 7, 13, 18, 22, 25, 27, 28, 29, 31, 34, 38, 43, 49, 56, 64 };
+  poly16_t poly16_t_data[8] = { 11111, 2222, 333, 44, 5, 65432, 54321, 43210 };
+  float32_t float32_t_data[4] = { 3.14159, 2.718, 1.414, 100.0 };
+  float64_t float64_t_data[2] = { 1.01001000100001, 12345.6789 };
+
+  VARIANTS (CHECK);
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_12.c
+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_12.c
@@ -0,0 +1,19 @@
+/* Verify:
+     * with outgoing.
+     * total frame size <= 512.
+     * number of callee-save reg >= 2.  */
+
+/* { dg-do run } */
+/* { dg-options "-O2 --save-temps" } */
+
+#include "test_frame_common.h"
+
+t_frame_pattern_outgoing (test12, 400, , 8, a[8])
+t_frame_run (test12)
+
+/* { dg-final { scan-assembler-times "sub\tsp, sp, #\[0-9\]+" 1 } } */
+
+/* Check epilogue using write-back.  */
+/* { dg-final { scan-assembler-times "ldp\tx29, x30, \\\[sp\\\], \[0-9\]+" 3 } } */
+
+/* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/lib/gcc.exp
+++ b/src/gcc/testsuite/lib/gcc.exp
@@ -126,7 +126,9 @@
     global GCC_UNDER_TEST
     global TOOL_OPTIONS
     global TEST_ALWAYS_FLAGS
-	
+    global flags_to_postpone
+    global board_info
+
     if {[target_info needs_status_wrapper] != "" && \
 	    [target_info needs_status_wrapper] != "0" && \
 	    [info exists gluefile] } {
@@ -162,8 +164,26 @@
 	set options [concat "{additional_flags=$TOOL_OPTIONS}" $options]
     }
 
+    # bind_pic_locally adds -fpie/-fPIE flags to flags_to_postpone and it is
+    # appended here to multilib_flags as it can be overridden by the latter
+    # if it was added earlier. After the target_compile, multilib_flags is
+    # restored to its orignal content.
+    set tboard [target_info name]
+    if {[board_info $tboard exists multilib_flags]} {
+        set orig_multilib_flags "[board_info [target_info name] multilib_flags]"
+        append board_info($tboard,multilib_flags) " $flags_to_postpone"
+    }
+
     lappend options "timeout=[timeout_value]"
     lappend options "compiler=$GCC_UNDER_TEST"
     set options [dg-additional-files-options $options $source]
-    return [target_compile $source $dest $type $options]
+    set return_val [target_compile $source $dest $type $options]
+
+    if {[board_info $tboard exists multilib_flags]} {
+        set board_info($tboard,multilib_flags) $orig_multilib_flags
+        set flags_to_postpone ""
+    }
+
+    return $return_val
 }
+
--- a/src/gcc/testsuite/lib/g++.exp
+++ b/src/gcc/testsuite/lib/g++.exp
@@ -288,6 +288,8 @@
     global gluefile wrap_flags
     global ALWAYS_CXXFLAGS
     global GXX_UNDER_TEST
+    global flags_to_postpone
+    global board_info
 
     if { [target_info needs_status_wrapper] != "" && [info exists gluefile] } {
 	lappend options "libs=${gluefile}"
@@ -313,10 +315,25 @@
 	exec rm -f $rponame
     }
 
+    # bind_pic_locally adds -fpie/-fPIE flags to flags_to_postpone and it is
+    # appended here to multilib_flags as it can be overridden by the latter
+    # if it was added earlier. After the target_compile, multilib_flags is
+    # restored to its orignal content.
+    set tboard [target_info name]
+    if {[board_info $tboard exists multilib_flags]} {
+        set orig_multilib_flags "[board_info [target_info name] multilib_flags]"
+        append board_info($tboard,multilib_flags) " $flags_to_postpone"
+    }
+
     set options [dg-additional-files-options $options $source]
 
     set result [target_compile $source $dest $type $options]
 
+    if {[board_info $tboard exists multilib_flags]} {
+        set board_info($tboard,multilib_flags) $orig_multilib_flags
+        set flags_to_postpone ""
+    }
+
     return $result
 }
 
--- a/src/gcc/testsuite/lib/wrapper.exp
+++ b/src/gcc/testsuite/lib/wrapper.exp
@@ -34,9 +34,11 @@
 	# became true for dejagnu-1.4.4.  The set of warnings and code
 	# that gcc objects on may change, so just make sure -w is always
 	# passed to turn off all warnings.
+	unset_currtarget_info wrap_compile_flags
 	set_currtarget_info wrap_compile_flags \
 	    "$saved_wrap_compile_flags -w $flags"
 	set result [build_wrapper $filename]
+	unset_currtarget_info wrap_compile_flags
 	set_currtarget_info wrap_compile_flags "$saved_wrap_compile_flags"
 	if { $result != "" } {
 	    set gluefile [lindex $result 0]
--- a/src/gcc/testsuite/lib/compat.exp
+++ b/src/gcc/testsuite/lib/compat.exp
@@ -134,7 +134,6 @@
 		     "$options"]
     if ![${tool}_check_compile "$testcase $testname link" "" \
 	 $dest $comp_output] then {
-	unresolved "$testcase $testname execute $optstr"
 	return
     }
 
--- a/src/gcc/testsuite/lib/gcc-defs.exp
+++ b/src/gcc/testsuite/lib/gcc-defs.exp
@@ -54,14 +54,19 @@
     if { [info proc ${tool}-dg-prune] != "" } {
 	global target_triplet
 	set gcc_output [${tool}-dg-prune $target_triplet $gcc_output]
+	if [string match "*::unsupported::*" $gcc_output] then {
+	    regsub -- "::unsupported::" $gcc_output "" gcc_output
+	    unsupported "$testcase: $gcc_output"
+	    return 0
+	}
+    } else {
+	set unsupported_message [${tool}_check_unsupported_p $gcc_output]
+	if { $unsupported_message != "" } {
+	    unsupported "$testcase: $unsupported_message"
+	    return 0
+	}
     }
 
-    set unsupported_message [${tool}_check_unsupported_p $gcc_output]
-    if { $unsupported_message != "" } {
-	unsupported "$testcase: $unsupported_message"
-	return 0
-    }
-
     # remove any leftover LF/CR to make sure any output is legit
     regsub -all -- "\[\r\n\]*" $gcc_output "" gcc_output
 
--- a/src/gcc/testsuite/lib/gfortran.exp
+++ b/src/gcc/testsuite/lib/gfortran.exp
@@ -234,6 +234,8 @@
     global gluefile wrap_flags
     global ALWAYS_GFORTRANFLAGS
     global GFORTRAN_UNDER_TEST
+    global flags_to_postpone
+    global board_info
 
     if { [target_info needs_status_wrapper] != "" && [info exists gluefile] } {
 	lappend options "libs=${gluefile}"
@@ -240,10 +242,27 @@
 	lappend options "ldflags=${wrap_flags}"
     }
 
+    # bind_pic_locally adds -fpie/-fPIE flags to flags_to_postpone and it is
+    # appended here to multilib_flags as it can be overridden by the latter
+    # if it was added earlier. After the target_compile, multilib_flags is
+    # restored to its orignal content.
+    set tboard [target_info name]
+    if {[board_info $tboard exists multilib_flags]} {
+        set orig_multilib_flags "[board_info [target_info name] multilib_flags]"
+        append board_info($tboard,multilib_flags) " $flags_to_postpone"
+    }
+
     lappend options "compiler=$GFORTRAN_UNDER_TEST"
     lappend options "timeout=[timeout_value]"
 
     set options [concat "$ALWAYS_GFORTRANFLAGS" $options]
     set options [dg-additional-files-options $options $source]
-    return [target_compile $source $dest $type $options]
+    set return_val [target_compile $source $dest $type $options]
+
+    if {[board_info $tboard exists multilib_flags]} {
+        set board_info($tboard,multilib_flags) $orig_multilib_flags
+        set flags_to_postpone ""
+    }
+
+    return $return_val
 }
--- a/src/gcc/testsuite/lib/target-supports.exp
+++ b/src/gcc/testsuite/lib/target-supports.exp
@@ -2274,7 +2274,7 @@
     }]
 }
 
-# Return 1 is this is an arm target using 32-bit instructions
+# Return 1 if this is an arm target using 32-bit instructions
 proc check_effective_target_arm32 { } {
     return [check_no_compiler_messages arm32 assembly {
 	#if !defined(__arm__) || (defined(__thumb__) && !defined(__thumb2__))
@@ -2283,10 +2283,10 @@
     }]
 }
 
-# Return 1 is this is an arm target not using Thumb
+# Return 1 if this is an arm target not using Thumb
 proc check_effective_target_arm_nothumb { } {
     return [check_no_compiler_messages arm_nothumb assembly {
-	#if (defined(__thumb__) || defined(__thumb2__))
+	#if !defined(__arm__) || (defined(__thumb__) || defined(__thumb2__))
 	#error FOO
 	#endif
     }]
@@ -3356,6 +3356,43 @@
     return $et_vect_shift_saved
 }
 
+proc check_effective_target_whole_vector_shift { } {
+    if { [istarget x86_64-*-*]
+	 || [istarget ia64-*-*]
+	 || ([check_effective_target_arm32]
+	     && [check_effective_target_arm_little_endian])
+	 || ([istarget mips*-*-*]
+	     && [check_effective_target_mips_loongson]) } {
+	set answer 1
+    } else {
+	set answer 0
+    }
+
+    verbose "check_effective_target_vect_long: returning $answer" 2
+    return $answer
+}
+
+# Return 1 if the target supports vector bswap operations.
+
+proc check_effective_target_vect_bswap { } {
+    global et_vect_bswap_saved
+
+    if [info exists et_vect_bswap_saved] {
+	verbose "check_effective_target_vect_bswap: using cached result" 2
+    } else {
+	set et_vect_bswap_saved 0
+	if { [istarget aarch64*-*-*]
+             || ([istarget arm*-*-*]
+                && [check_effective_target_arm_neon])
+	   } {
+	   set et_vect_bswap_saved 1
+	}
+    }
+
+    verbose "check_effective_target_vect_bswap: returning $et_vect_bswap_saved" 2
+    return $et_vect_bswap_saved
+}
+
 # Return 1 if the target supports hardware vector shift operation for char.
 
 proc check_effective_target_vect_shift_char { } {
@@ -3554,8 +3591,7 @@
     } else {
         set et_vect_perm_saved 0
         if { [is-effective-target arm_neon_ok]
-	     || ([istarget aarch64*-*-*]
-		 && [is-effective-target aarch64_little_endian])
+	     || [istarget aarch64*-*-*]
 	     || [istarget powerpc*-*-*]
              || [istarget spu-*-*]
 	     || [istarget i?86-*-*]
@@ -5242,16 +5278,26 @@
     return $flags
 }
 
+if {![info exists flags_to_postpone]} {
+    set flags_to_postpone ""
+}
+
 # Add to FLAGS the flags needed to enable functions to bind locally
 # when using pic/PIC passes in the testsuite.
+proc add_options_for_bind_pic_locally { flags } {
+    global flags_to_postpone
 
-proc add_options_for_bind_pic_locally { flags } {
+    # Instead of returning 'flags' with the -fPIE or -fpie appended, we save it
+    # in 'flags_to_postpone' and append it later in gcc_target_compile procedure in
+    # order to make sure that the multilib_flags doesn't override this.
+
     if {[check_no_compiler_messages using_pic2 assembly {
         #if __PIC__ != 2
         #error FOO
         #endif
     }]} {
-	return "$flags -fPIE"
+        set flags_to_postpone "-fPIE"
+        return $flags
     }
     if {[check_no_compiler_messages using_pic1 assembly {
         #if __PIC__ != 1
@@ -5258,9 +5304,9 @@
         #error FOO
         #endif
     }]} {
-	return "$flags -fpie"
+        set flags_to_postpone "-fpie"
+        return $flags
     }
-
     return $flags
 }
 
--- a/src/gcc/testsuite/ChangeLog.linaro
+++ b/src/gcc/testsuite/ChangeLog.linaro
@@ -0,0 +1,1686 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-10-28 Yvan Roux  <yvan.roux@linaro.org>
+	   Sebastian Pop  <s.pop@samsung.com>
+
+	Backport from trunk r221007, r221675, r222011.
+	2015-04-11  Jakub Jelinek  <jakub@redhat.com>
+
+	PR tree-optimization/65735
+	* gcc.c-torture/compile/pr65735.c: New test.
+
+	2015-03-25  Sebastian Pop  <s.pop@samsung.com>
+
+	PR tree-optimization/65177
+	* gcc.dg/tree-ssa/ssa-dom-thread-10.c: New.
+
+	2015-02-26  Sebastian Pop  <s.pop@samsung.com>
+
+	PR tree-optimization/65048
+	* gcc.dg/tree-ssa/ssa-dom-thread-9.c: New.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-06-02  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	Backport from trunk r217753.
+	2014-11-19  Jakub Jelinek  <jakub@redhat.com>
+
+	PR rtl-optimization/63843
+	* gcc.c-torture/execute/pr63843.c: New test.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-04-09  Kugan Vivekanandarajah  <kugan.vivekanandarajah@linaro.org>
+
+	Backport from trunk r219745.
+	2015-01-16  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	PR target/64263
+	* gcc.target/aarch64/pr64263_1.c: New test.
+
+2015-04-07  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r218658.
+	2014-12-12  Zhenqiang Chen  <zhenqiang.chen@arm.com>
+
+	* gcc.dg/pr64007.c: New test.
+
+2015-04-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r218961.
+	2014-12-19  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* gcc.target/aarch64/eon_1.c: New test.
+
+2015-04-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r218530.
+	2014-12-09  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* gcc.target/aarch64/vabs_intrinsic_2.c: New test.
+
+2015-04-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r218868.
+	2014-12-18  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* gcc.target/aarch64/ushr64_1.c: Remove scan-assembler "ushr...64".
+
+2015-04-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r218855.
+	2014-12-18  Bin Cheng  <bin.cheng@arm.com>
+
+	PR tree-optimization/62178
+	* gcc.target/aarch64/pr62178.c: New test.
+
+2015-03-24  Maxim Kuvyrkov  <maxim.kuvyrkov@linaro.org>
+
+	Backport from trunk r220808.
+        * gcc.dg/pr64935-1.c, gcc.dg/pr64935-2.c: New tests.
+
+2015-03-18  Michael Collison  <michael.collison@linaro.org>
+
+	Backport from trunk r218012.
+	2014-11-24  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* gcc.target/aarch64/fuse_adrp_add_1.c: New test.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-03-10  Michael Collison  <michael.collison@linaro.org>
+
+	Backport from trunk r218503.
+	2014-12-08  Sandra Loosemore  <sandra@codesourcery.com>
+
+	* gcc.target/aarch64/bics_4.c: New.
+
+2015-03-10  Michael Collison  <michael.collison@linaro.org>
+
+	Backport from trunk r218486.
+	2014-12-08  Alex Velenko  <Alex.Velenko@arm.com>
+
+	* gcc.target/aarch64/bics_3.c : New testcase.
+
+2015-03-10  Michael Collison  <michael.collison@linaro.org>
+
+	Backport from trunk r217938.
+	2014-11-21  Jiong Wang  <jiong.wang@arm.com>
+
+	* gcc.target/aarch64/vect_ctz_1.c: New testcase.
+
+2015-03-10  Michael Collison  <michael.collison@linaro.org>
+
+	Backport from trunk r217852.
+	2014-11-20  Tejas Belagod  <tejas.belagod@arm.com>
+
+	* gcc.target/aarch64/symbol-range.c: New.
+	* gcc.target/aarch64/symbol-range-tiny.c: New.
+
+2015-03-06  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	Backport from trunk r218463, r219764, r219765, r219767, r219914,
+	r219917, r219918, r219919, r219920, r219921, r219922, r219930,
+	r219931, r219932, r219934, r219937, r219938, r219939, r219940,
+	r219941, r219942, r219943, r219944, r219945, r219946, r219947,
+	r219948, r219949, r219950, r220117, r220118, r220119, r220121,
+	r220122, r220123, r220124, r220126, r220353.
+
+	2015-02-02  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
+	(_ARM_FPSRC): Add DN and AHP fields.
+	(clean_results): Force DN=1 on AArch64.
+	* gcc.target/aarch64/advsimd-intrinsics/binary_op_no64.inc: New file.
+	* gcc.target/aarch64/advsimd-intrinsics/vhadd.c: New file.
+	* gcc.target/aarch64/advsimd-intrinsics/vhsub.c: New file.
+	* gcc.target/aarch64/advsimd-intrinsics/vmax.c: New file.
+	* gcc.target/aarch64/advsimd-intrinsics/vmin.c: New file.
+	* gcc.target/aarch64/advsimd-intrinsics/vrhadd.c: New file.
+
+	2015-01-26  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vpaddl.c: New file.
+
+	2015-01-26  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vpadal.c: New file.
+
+	2015-01-26  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vmvn.c: New file.
+
+	2015-01-26  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vmovl.c: New file.
+
+	2015-01-26  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vpXXX.inc: New file.
+	* gcc.target/aarch64/advsimd-intrinsics/vpadd.c: New file.
+	* gcc.target/aarch64/advsimd-intrinsics/vpmax.c: New file.
+	* gcc.target/aarch64/advsimd-intrinsics/vpmin.c: New file.
+
+	2015-01-26  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vmlX_n.inc: New file.
+	* gcc.target/aarch64/advsimd-intrinsics/vmla_n.c: New file.
+	* gcc.target/aarch64/advsimd-intrinsics/vmls_n.c: New file.
+
+	2015-01-26  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vXXXhn.inc: New file.
+	* gcc.target/aarch64/advsimd-intrinsics/vraddhn.c: New file.
+	* gcc.target/aarch64/advsimd-intrinsics/vrsubhn.c: New file.
+	* gcc.target/aarch64/advsimd-intrinsics/vsubhn.c: New file.
+	* gcc.target/aarch64/advsimd-intrinsics/vaddhn.c: Use code from
+	vXXXhn.inc.
+
+	2015-01-21  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vqdmull_n.c: New file.
+
+	2015-01-21  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vqdmull_lane.c: New file.
+
+	2015-01-21  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vqdmull.c: New file.
+
+	2015-01-21  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vqdmulh_n.c: New file.
+
+	2015-01-21  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vqdmulh_lane.c: New file.
+
+	2015-01-21  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vqdmulh.c: New file.
+
+	2015-01-21  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vmull_n.c: New file.
+
+	2015-01-21  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vmull_lane.c: New file.
+
+	2015-01-21  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vmull.c: New file.
+
+	2015-01-21  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vmul_n.c: New file.
+
+	2015-01-21  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vmul_lane.c: New file.
+
+	2015-01-21  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vmovn.c: New file.
+
+	2015-01-21  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vXXXw.inc: New file.
+	* gcc.target/aarch64/advsimd-intrinsics/vsubw.c: New file.
+	* gcc.target/aarch64/advsimd-intrinsics/vaddw.c: Use code from
+	vXXXw.inc.
+
+	2015-01-21  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vXXXl.inc: New file.
+	* gcc.target/aarch64/advsimd-intrinsics/vsubl.c: New file.
+	* gcc.target/aarch64/advsimd-intrinsics/vaddl.c: Use code from
+	vXXXl.inc.
+
+	2015-01-21  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vsXi_n.inc: New file.
+	* gcc.target/aarch64/advsimd-intrinsics/vsli_n.c: New file.
+	* gcc.target/aarch64/advsimd-intrinsics/vsri_n.c: New file.
+
+	2015-01-21  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vqdmlXl_n.inc: New file.
+	* gcc.target/aarch64/advsimd-intrinsics/vqdmlal_n.c: New file.
+	* gcc.target/aarch64/advsimd-intrinsics/vqdmlsl_n.c: New file.
+
+	2015-01-21  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vqdmlXl_lane.inc: New file.
+	* gcc.target/aarch64/advsimd-intrinsics/vqdmlal_lane.c: New file.
+	* gcc.target/aarch64/advsimd-intrinsics/vqdmlsl_lane.c: New file.
+
+	2015-01-21  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vqdmlXl.inc: New file.
+	* gcc.target/aarch64/advsimd-intrinsics/vqdmlal.c: New file.
+	* gcc.target/aarch64/advsimd-intrinsics/vqdmlsl.c: New file.
+
+	2015-01-20  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vmlXl_n.inc: New file.
+	* gcc.target/aarch64/advsimd-intrinsics/vmlal_n.c: New file.
+	* gcc.target/aarch64/advsimd-intrinsics/vmlsl_n.c: New file.
+
+	2015-01-20  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vmlXl_lane.inc: New file.
+	* gcc.target/aarch64/advsimd-intrinsics/vmlal_lane.c: New file.
+	* gcc.target/aarch64/advsimd-intrinsics/vmlsl_lane.c: New file.
+
+	2015-01-20  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vmlXl.inc: New file.
+	* gcc.target/aarch64/advsimd-intrinsics/vmlal.c: New file.
+	* gcc.target/aarch64/advsimd-intrinsics/vmlsl.c: New file.
+
+	2015-01-20  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vshuffle.inc: New file.
+	* gcc.target/aarch64/advsimd-intrinsics/vtrn.c: New file.
+	* gcc.target/aarch64/advsimd-intrinsics/vuzp.c: Use code from
+	vshuffle.inc.
+	* gcc.target/aarch64/advsimd-intrinsics/vzip.c: Use code from
+	vshuffle.inc.
+
+	2015-01-20  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vmlX_lane.inc: New file.
+	* gcc.target/aarch64/advsimd-intrinsics/vmla_lane.c: New file.
+	* gcc.target/aarch64/advsimd-intrinsics/vmls_lane.c: New file.
+
+	2015-01-20  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vmlX.inc: New file.
+	* gcc.target/aarch64/advsimd-intrinsics/vmla.c: New file.
+	* gcc.target/aarch64/advsimd-intrinsics/vmls.c: New file.
+
+	2015-01-20  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vldX_dup.c: New file.
+
+	2015-01-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vld1_lane.c: New file.
+
+	2015-01-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h (CHECK):
+	Add trace.
+	(CHECK_FP): Likewise.
+	(CHECK_CUMULATIVE_SAT): Likewise.
+
+	2015-01-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
+	(Set_Neon_Cumulative_Sat): Add parameter.
+	(__set_neon_cumulative_sat): Support new parameter.
+	* gcc.target/aarch64/advsimd-intrinsics/binary_sat_op.inc
+	(TEST_BINARY_SAT_OP1): Call Set_Neon_Cumulative_Sat with new
+	argument.
+	* gcc.target/aarch64/advsimd-intrinsics/unary_sat_op.inc
+	(TEST_UNARY_SAT_OP1): Call Set_Neon_Cumulative_Sat with new
+	argument.
+
+	2014-12-07  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vaddhn.c: Actually execute
+	the test.
+	* gcc.target/aarch64/advsimd-intrinsics/vaddl.c: Actually execute
+	the test. Fix expected output.
+	* gcc.target/aarch64/advsimd-intrinsics/vaddw.c: Likewise.
+
+2015-03-06  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	Backport from trunk r217707.
+	2014-11-18  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/arm/neon/vbicQs16.c: Regenerate.
+	* gcc.target/arm/neon/vbicQs32.c: Likewise.
+	* gcc.target/arm/neon/vbicQs64.c: Likewise.
+	* gcc.target/arm/neon/vbicQs8.c: Likewise.
+	* gcc.target/arm/neon/vbicQu16.c: Likewise.
+	* gcc.target/arm/neon/vbicQu32.c: Likewise.
+	* gcc.target/arm/neon/vbicQu64.c: Likewise.
+	* gcc.target/arm/neon/vbicQu8.c: Likewise.
+	* gcc.target/arm/neon/vbics16.c: Likewise.
+	* gcc.target/arm/neon/vbics32.c: Likewise.
+	* gcc.target/arm/neon/vbics64.c: Likewise.
+	* gcc.target/arm/neon/vbics8.c: Likewise.
+	* gcc.target/arm/neon/vbicu16.c: Likewise.
+	* gcc.target/arm/neon/vbicu32.c: Likewise.
+	* gcc.target/arm/neon/vbicu64.c: Likewise.
+	* gcc.target/arm/neon/vbicu8.c: Likewise.
+	* gcc.target/arm/neon/vornQs16.c: Likewise.
+	* gcc.target/arm/neon/vornQs32.c: Likewise.
+	* gcc.target/arm/neon/vornQs64.c: Likewise.
+	* gcc.target/arm/neon/vornQs8.c: Likewise.
+	* gcc.target/arm/neon/vornQu16.c: Likewise.
+	* gcc.target/arm/neon/vornQu32.c: Likewise.
+	* gcc.target/arm/neon/vornQu64.c: Likewise.
+	* gcc.target/arm/neon/vornQu8.c: Likewise.
+	* gcc.target/arm/neon/vorns16.c: Likewise.
+	* gcc.target/arm/neon/vorns32.c: Likewise.
+	* gcc.target/arm/neon/vorns64.c: Likewise.
+	* gcc.target/arm/neon/vorns8.c: Likewise.
+	* gcc.target/arm/neon/vornu16.c: Likewise.
+	* gcc.target/arm/neon/vornu32.c: Likewise.
+	* gcc.target/arm/neon/vornu64.c: Likewise.
+	* gcc.target/arm/neon/vornu8.c: Likewise.
+
+2015-03-06  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	Backport from trunk r217706.
+	2014-11-18  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vcls.c: New test.
+	* gcc.target/aarch64/advsimd-intrinsics/vcnt.c: New test.
+	* gcc.target/aarch64/advsimd-intrinsics/vcombine.c: New test.
+	* gcc.target/aarch64/advsimd-intrinsics/vcreate.c: New test.
+	* gcc.target/aarch64/advsimd-intrinsics/vcvt.c: New test.
+	* gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c: New test.
+	* gcc.target/aarch64/advsimd-intrinsics/vext.c: New test.
+	* gcc.target/aarch64/advsimd-intrinsics/vget_high.c: New test.
+	* gcc.target/aarch64/advsimd-intrinsics/vget_low.c: New test.
+
+2015-03-06  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	Backport from trunk r216663.
+	2014-10-24  Jiong Wang  <jiong.wang@arm.com>
+
+	* lib/target-supports.exp
+	(check_effective_target_arm_crypto_ok_nocache): Remove declaration for
+	vaeseq_u8.
+	(check_effective_target_arm_neon_fp16_ok_nocache): Remove declaration
+	for vcvt_f16_f32.
+	(check_effective_target_arm_neonv2_ok_nocache): Remove declaration for
+	vfma_f32.
+
+2015-03-05  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r218115, r218733, r218746, r220491.
+	2015-02-06  Sebastian Pop  <s.pop@samsung.com>
+		    Brian Rzycki  <b.rzycki@samsung.com>
+
+	PR tree-optimization/64878
+	* testsuite/gcc.dg/tree-ssa/ssa-dom-thread-8.c: New.
+
+	2014-12-15  Richard Biener  <rguenther@suse.de>
+
+	PR middle-end/64246
+	* gnat.dg/opt46.adb: New testcase.
+	* gnat.dg/opt46.ads: Likewise.
+	* gnat.dg/opt46_pkg.adb: Likewise.
+	* gnat.dg/opt46_pkg.ads: Likewise.
+
+	2014-12-15  Richard Biener  <rguenther@suse.de>
+
+	PR tree-optimization/64284
+	* gcc.dg/torture/pr64284.c: New testcase.
+
+	2014-11-27  Richard Biener  <rguenther@suse.de>
+
+	PR tree-optimization/64083
+	* gcc.dg/torture/pr64083.c: New testcase.
+
+2015-03-05  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r220860.
+	2015-02-20  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* gcc.target/aarch64/sisd-shft-neg_1.c: New test.
+
+2015-03-04  Prathamesh Kulkarni  <prathamesh.kulkarni@linaro.org>
+
+	Backport from trunk r215722.
+	2014-09-30  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* gcc.target/aarch64/simd/vqdmullh_laneq_s16.c: New.
+	* gcc.target/aarch64/simd/vqdmulls_laneq_s32.c: Likewise.
+	* gcc.target/aarch64/simd/vqdmulls_lane_s32.c: Fix return type.
+	* gcc.target/aarch64/scalar_intrinsics.c (test_vqdmulls_s32):  Fix
+	return type.
+
+2015-03-04  Prathamesh Kulkarni  <prathamesh.kulkarni@linaro.org>
+
+	Backport from trunk r215612.
+	2014-09-25  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* gcc.target/aarch64/simd/vqshlb_1.c: New.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-02-10  Michael Collison  <michael.collison@linaro.org>
+
+	Backport from trunk r217185, r217186.
+	2014-11-06  Hale Wang  <hale.wang@arm.com>
+
+	* gcc.target/arm/small-multiply-m0-1.c: New test for
+	* gcc.target/arm/small-multiply-m0-2.c: Likewise.
+	* gcc.target/arm/small-multiply-m0-3.c: Likewise.
+	* gcc.target/arm/small-multiply-m0plus-1.c: New test for
+	* gcc.target/arm/small-multiply-m0plus-2.c: Likewise.
+	* gcc.target/arm/small-multiply-m0plus-3.c: Likewise.
+	* gcc.target/arm/small-multiply-m1-1.c: New test for
+	* gcc.target/arm/small-multiply-m1-2.c: Likewise.
+	* gcc.target/arm/small-multiply-m1-3.c: Likewise.
+
+2015-02-10  Michael Collison  <michael.collison@linaro.org>
+
+	Backport from trunk r217118.
+	2014-11-05  Alex Velenko  <Alex.Velenko@arm.com>
+
+	* gcc.dg/asr-div1.c: New testcase.
+
+2015-02-10  Michael Collison  <michael.collison@linaro.org>
+
+	Backport from trunk r217228.
+	2014-11-07  Jiong Wang  <jiong.wang@arm.com>
+
+	* gcc.dg/tree-ssa/20040204-1.c: Add aarch64*-*-* to the list.
+
+2015-02-10  Michael Collison  <michael.collison@linaro.org>
+
+	Backport from trunk r219583.
+	2015-01-14  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	PR target/64460
+	* gcc.target/arm/pr64460_1.c: New test.
+
+2015-02-10  Michael Collison  <michael.collison@linaro.org>
+
+	Backport from trunk r217431.
+	2014-11-12  Jiong Wang  <jiong.wang@arm.com>
+
+	* gcc.target/aarch64/lr_free_1.c: New testcase for -fomit-frame-pointer.
+	* gcc.target/aarch64/lr_free_2.c: New testcase for leaf
+	-fno-omit-frame-pointer.
+
+2015-02-09  Prathamesh Kulkarni  <prathamesh.kulkarni@linaro.org>
+
+	Backport from trunk r216675.
+	2014-10-24  Jiong Wang  <jiong.wang@arm.com>
+
+	* gcc.target/arm/aapcs/abitest.h: Declare memcpy.
+
+2015-02-04  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	Backport from trunk r216640-r216661.
+	2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vuzp.c: New file.
+	* gcc.target/aarch64/advsimd-intrinsics/vzip.c: Likewise.
+
+	2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vmul.c: New file.
+
+	2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vldX_lane.c: New file.
+
+	2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vldX.c: New file.
+
+	2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c: New file.
+
+	2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c: New file.
+
+	2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vclz.c: New file.
+
+	2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vbsl.c: New file.
+
+	2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vaddw.c: New file.
+
+	2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vaddl.c: New file.
+
+	2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vaddhn.c: New file.
+
+	2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vabdl.c: New file.
+
+	2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vabd.c: New file.
+
+	2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/vabal.c: New file.
+
+	2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/binary_sat_op.inc: New
+	file.
+	* gcc.target/aarch64/advsimd-intrinsics/vqadd.c: Likewise.
+	* gcc.target/aarch64/advsimd-intrinsics/vqsub.c: Likewise.
+
+	2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/unary_sat_op.inc: New
+	file.
+	* gcc.target/aarch64/advsimd-intrinsics/vqabs.c: Likewise.
+	* gcc.target/aarch64/advsimd-intrinsics/vqneg.c: Likewise.
+
+	2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/cmp_fp_op.inc: New file.
+	* gcc.target/aarch64/advsimd-intrinsics/vcage.c: Likewise.
+	* gcc.target/aarch64/advsimd-intrinsics/vcagt.c: Likewise.
+	* gcc.target/aarch64/advsimd-intrinsics/vcale.c: Likewise.
+	* gcc.target/aarch64/advsimd-intrinsics/vcalt.c: Likewise.
+
+	2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/cmp_op.inc: New file.
+	* gcc.target/aarch64/advsimd-intrinsics/vceq.c: Likewise.
+	* gcc.target/aarch64/advsimd-intrinsics/vcge.c: Likewise.
+	* gcc.target/aarch64/advsimd-intrinsics/vcgt.c: Likewise.
+	* gcc.target/aarch64/advsimd-intrinsics/vcle.c: Likewise.
+	* gcc.target/aarch64/advsimd-intrinsics/vclt.c: Likewise.
+
+	2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/binary_op.inc: New file.
+	* gcc.target/aarch64/advsimd-intrinsics/vadd.c: Likewise.
+	* gcc.target/aarch64/advsimd-intrinsics/vand.c: Likewise.
+	* gcc.target/aarch64/advsimd-intrinsics/vbic.c: Likewise.
+	* gcc.target/aarch64/advsimd-intrinsics/veor.c: Likewise.
+	* gcc.target/aarch64/advsimd-intrinsics/vorn.c: Likewise.
+	* gcc.target/aarch64/advsimd-intrinsics/vorr.c: Likewise.
+	* gcc.target/aarch64/advsimd-intrinsics/vsub.c: Likewise.
+
+	2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/unary_op.inc: New file.
+	* gcc.target/aarch64/advsimd-intrinsics/vabs.c: Likewise.
+	* gcc.target/aarch64/advsimd-intrinsics/vneg.c: Likewise.
+
+	2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/arm/README.advsimd-intrinsics: New file.
+	* gcc.target/aarch64/advsimd-intrinsics/README: Likewise.
+	* gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h: Likewise.
+	* gcc.target/aarch64/advsimd-intrinsics/compute-ref-data.h:
+	Likewise.
+	* gcc.target/aarch64/advsimd-intrinsics/advsimd-intrinsics.exp:
+	Likewise.
+	* gcc.target/aarch64/advsimd-intrinsics/vaba.c: Likewise.
+	* gcc.target/aarch64/advsimd-intrinsics/vld1.c: Likewise.
+	* gcc.target/aarch64/advsimd-intrinsics/vshl.c: Likewise.
+
+2015-02-05  Prathamesh Kulkarni  <prathamesh.kulkarni@linaro.org>
+
+	Backport from trunk r217230.
+	* gcc.target/arm/lp1243022.c (xhci_test_trb_in_td): Add return type.
+	(xhci_check_trb_in_td_math): Likewise.
+
+	2014-11-07  Jiong Wang  <jiong.wang@arm.com>
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2015-01-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r218451.
+	2014-12-06  James Greenhalgh  <james.greenhalgh@arm.com>
+		    Sebastian Pop  <s.pop@samsung.com>
+		    Brian Rzycki  <b.rzycki@samsung.com>
+
+	PR tree-optimization/54742
+	* gcc.dg/tree-ssa/ssa-dom-thread-6.c: New test.
+	* gcc.dg/tree-ssa/ssa-dom-thread-7.c: New test.
+
+2015-01-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r211075.
+	2014-04-30  Alan Lawrence  <alan.lawrence@arm.com>
+
+	gcc.target/arm/simd/vrev16p8_1.c: New file.
+	gcc.target/arm/simd/vrev16qp8_1.c: New file.
+	gcc.target/arm/simd/vrev16qs8_1.c: New file.
+	gcc.target/arm/simd/vrev16qu8_1.c: New file.
+	gcc.target/arm/simd/vrev16s8_1.c: New file.
+	gcc.target/arm/simd/vrev16u8_1.c: New file.
+	gcc.target/arm/simd/vrev32p16_1.c: New file.
+	gcc.target/arm/simd/vrev32p8_1.c: New file.
+	gcc.target/arm/simd/vrev32qp16_1.c: New file.
+	gcc.target/arm/simd/vrev32qp8_1.c: New file.
+	gcc.target/arm/simd/vrev32qs16_1.c: New file.
+	gcc.target/arm/simd/vrev32qs8_1.c: New file.
+	gcc.target/arm/simd/vrev32qu16_1.c: New file.
+	gcc.target/arm/simd/vrev32qu8_1.c: New file.
+	gcc.target/arm/simd/vrev32s16_1.c: New file.
+	gcc.target/arm/simd/vrev32s8_1.c: New file.
+	gcc.target/arm/simd/vrev32u16_1.c: New file.
+	gcc.target/arm/simd/vrev32u8_1.c: New file.
+	gcc.target/arm/simd/vrev64f32_1.c: New file.
+	gcc.target/arm/simd/vrev64p16_1.c: New file.
+	gcc.target/arm/simd/vrev64p8_1.c: New file.
+	gcc.target/arm/simd/vrev64qf32_1.c: New file.
+	gcc.target/arm/simd/vrev64qp16_1.c: New file.
+	gcc.target/arm/simd/vrev64qp8_1.c: New file.
+	gcc.target/arm/simd/vrev64qs16_1.c: New file.
+	gcc.target/arm/simd/vrev64qs32_1.c: New file.
+	gcc.target/arm/simd/vrev64qs8_1.c: New file.
+	gcc.target/arm/simd/vrev64qu16_1.c: New file.
+	gcc.target/arm/simd/vrev64qu32_1.c: New file.
+	gcc.target/arm/simd/vrev64qu8_1.c: New file.
+	gcc.target/arm/simd/vrev64s16_1.c: New file.
+	gcc.target/arm/simd/vrev64s32_1.c: New file.
+	gcc.target/arm/simd/vrev64s8_1.c: New file.
+	gcc.target/arm/simd/vrev64u16_1.c: New file.
+	gcc.target/arm/simd/vrev64u32_1.c: New file.
+	gcc.target/arm/simd/vrev64u8_1.c: New file.
+
+2015-01-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209620.
+	2014-04-22  Vidya Praveen  <vidyapraveen@arm.com>
+
+	* gcc.target/aarch64/cvtf_1.c: New.
+
+2015-01-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r217362.
+	2014-11-11  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* gcc.target/aarch64/vbslq_f64_1.c: New.
+	* gcc.target/aarch64/vbslq_f64_2.c: Likewise.
+	* gcc.target/aarch64/vbslq_u64_1.c: Likewise.
+	* gcc.target/aarch64/vbslq_u64_2.c: Likewise.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r217742.
+	2014-11-18  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	PR target/63937
+	* gcc.dg/memset-2.c: New.
+
+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r216638.
+	2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* lib/wrapper.exp ({tool}_maybe_build_wrapper): Clear
+	wrap_compile_flags before setting it.
+
+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r216544.
+	2014-10-22  Jiong Wang  <jiong.wang@arm.com>
+
+	* gcc.target/aarch64/pic-constantpool1.c: Add explicit declaration.
+	* gcc.target/aarch64/pic-symrefplus.c: Likewise.
+	* gcc.target/aarch64/reload-valid-spoff.c: Likewise.
+	* gcc.target/aarch64/vect.x: Likewise.
+	* gcc.target/aarch64/vect-ld1r.x: Add return type.
+	* gcc.target/aarch64/vect-fmax-fmin.c: Likewise.
+	* gcc.target/aarch64/vect-fp.c: Likewise.
+
+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r216543.
+	2014-10-22  Jiong Wang  <jiong.wang@arm.com>
+
+	* lib/compat.exp (compat-run): Remove "unresolved".
+	* lib/gcc-defs.exp (${tools}_check_compile): Update code logic for
+	unsupported testcase.
+
+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r216517.
+	2014-10-21  Jiong Wang  <jiong.wang@arm.com>
+
+	* gcc.target/arm/20031108-1.c (Proc_7): Add explicit declaration.
+	(Proc_1): Add return type.
+	* gcc.target/arm/cold-lc.c (show_stack): Add explict declaration.
+	* gcc.target/arm/neon-modes-2.c (foo): Likewise.
+	* gcc.target/arm/pr43920-2.c (lseek): Likewise.
+	* gcc.target/arm/pr44788.c (foo): Likewise.
+	* gcc.target/arm/pr55642.c (abs): Likewise.
+	* gcc.target/arm/pr58784.c (f): Likewise.
+	* gcc.target/arm/pr60650.c (foo1, foo2): Likewise.
+	* gcc.target/arm/vfp-ldmdbs.c (bar): Likewise.
+	* gcc.target/arm/vfp-ldmias.c (bar): Likewise.
+	* gcc.target/arm/pr60650-2.c (fn1, fn2): Add return type and add type
+	for local variables.
+	* lib/target-supports.exp
+	(check_effective_target_arm_crypto_ok_nocache): Add declaration for
+	vaeseq_u8.
+	(check_effective_target_arm_neon_fp16_ok_nocache): Add declaration for
+	vcvt_f16_f32.
+	(check_effective_target_arm_neonv2_ok_nocache): Add declaration for
+	vfma_f32.
+	* gcc.target/arm/pr51968.c: Add -Wno-implicit-function-declaration.
+
+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
+ 
+	Backport from trunk r215071.
+	2014-09-09  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* gcc.target/aarch64/simd/int_comparisons_1.c: Tighten regexp.
+
+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r215540.
+	2014-09-24  Zhenqiang Chen  <zhenqiang.chen@arm.com>
+
+	* gcc.target/arm/pr63210.c: New test.
+
+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r215475.
+	2014-09-22  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* gcc.dg/vect/vect-reduc-or_1.c: New test.
+	* gcc.dg/vect/vect-reduc-or_2.c: Likewise.
+
+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r215473.
+	2014-09-22  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* lib/target-supports.exp (check_effective_target_whole_vector_shift):
+	New.
+
+	* gcc.dg/vect/vect-reduc-mul_1.c: New test.
+	* gcc.dg/vect/vect-reduc-mul_2.c: New test.
+
+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r215177.
+	2014-09-11  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* gcc.target/aarch64/vset_lane_1.c: New test.
+
+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r215129.
+	2014-09-10  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* gcc.target/aarch64/vstN_1.c: New test.
+
+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r215126.
+	2014-09-10  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* gcc.target/aarch64/vldN_lane_1.c: New test.
+
+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r215078.
+	2014-09-09  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* gcc.target/aarch64/vldN_dup_1.c: New test.
+
+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r215077.
+	2014-09-09  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* gcc.target/aarch64/vld1-vst1_1.c: Rewrite to test all variants.
+
+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r215072.
+	2014-09-09  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* gcc.target/aarch64/vldN_1.c: New test.
+
+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r215047.
+	2014-09-09  Tony Wang  <tony.wang@arm.com>
+
+	* gcc.target/arm/xordi3-opt.c: Disable this
+	test case for thumb1 target.
+	* gcc.target/arm/iordi3-opt.c: Ditto.
+
+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r215046.
+	2014-09-09  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	PR target/61749
+	* gcc.target/aarch64/vqdml_lane_intrinsics-bad_1.c: New test.
+
+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r214950.
+	2014-09-05  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* gcc.target/aarch64/vget_high_1.c: New test.
+	* gcc.target/aarch64/vget_low_1.c: Likewise.
+
+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r214948.
+	2014-09-05  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* gcc.target/aarch64/simd/int_comparisons.x: New file.
+	* gcc.target/aarch64/simd/int_comparisons_1.c: New test.
+	* gcc.target/aarch64/simd/int_comparisons_2.c: Ditto.
+
+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r213382.
+	2014-07-31  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* gcc.target/aarch64/scalar_intrinsics.c (test_vpaddd_f64): New.
+	(test_vpaddd_s64): Likewise.
+	(test_vpaddd_s64): Likewise.
+	* gcc.target/aarch64/simd/vpaddd_f64: New.
+	* gcc.target/aarch64/simd/vpaddd_s64: New.
+	* gcc.target/aarch64/simd/vpaddd_u64: New.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-10-08  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r214825, r214826, r215085.
+	2014-09-09  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* gcc.target/arm/vect-lceilf_1.c: Make input and output arrays global
+	and 16-byte aligned.
+	* gcc.target/arm/vect-lfloorf_1.c: Likewise.
+	* gcc.target/arm/vect-lroundf_1.c: Likewise.
+	* gcc.target/arm/vect-rounding-btruncf.c: Likewise.
+	* gcc.target/arm/vect-rounding-ceilf.c: Likewise.
+	* gcc.target/arm/vect-rounding-floorf.c: Likewise.
+	* gcc.target/arm/vect-rounding-roundf.c: Likewise.
+
+	2014-09-02  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	PR target/62275
+	* gcc.target/arm/vect-lceilf_1.c: New test.
+	* gcc.target/arm/vect-lfloorf_1.c: Likewise.
+	* gcc.target/arm/vect-lroundf_1.c: Likewise.
+
+	2014-09-02  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	PR target/62275
+	* gcc.target/arm/lceil-vcvt_1.c: New test.
+	* gcc.target/arm/lfloor-vcvt_1.c: Likewise.
+	* gcc.target/arm/lround-vcvt_1.c: Likewise.
+
+2014-10-06  Venkataramanan Kumar  <venkataramanan.kumar@linaro.org>
+
+	Backport from trunk r214943.
+	2014-09-05  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* gcc.target/aarch64/simd/vrbit_1.c: New test.
+
+2014-10-06  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r215385.
+	2014-09-19  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* gcc.dg/ssp-3.c: New.
+	* gcc.dg/ssp-4.c: Likewise.
+
+2014-10-06  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r215136.
+	2014-09-10  Xinliang David Li  <davidxl@google.com>
+
+	PR target/63209
+	* gcc.c-torture/execute/pr63209.c: New test.
+
+2014-10-06  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r215067.
+	2014-09-09  Jiong Wang  <jiong.wang@arm.com>
+
+	* gcc.target/arm/vect-copysignf.c: New testcase.
+
+2014-10-03  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r215050, r215051, r215052, r215053, r215054.
+	2014-09-09  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* gcc.target/arm/vfp-1.c: Updated expected assembly.
+
+	2014-09-09  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* gcc.target/arm/vfp-1.c: Updated expected assembly.
+
+	2014-09-09  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* gcc.target/arm/vfp-1.c: Updated expected assembly.
+
+	2014-09-09  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* gcc.target/arm/vfp-1.c: Updated expected assembly.
+
+	2014-09-09  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* gcc.target/arm/pr51835.c: Update expected assembly.
+	* gcc.target/arm/vfp-1.c: Likewise.
+	* gcc.target/arm/vfp-ldmdbd.c: Likewise.
+	* gcc.target/arm/vfp-ldmdbs.c: Likewise.
+	* gcc.target/arm/vfp-ldmiad.c: Likewise.
+	* gcc.target/arm/vfp-ldmias.c: Likewise.
+	* gcc.target/arm/vfp-stmdbd.c: Likewise.
+	* gcc.target/arm/vfp-stmdbs.c: Likewise.
+	* gcc.target/arm/vfp-stmiad.c: Likewise.
+	* gcc.target/arm/vfp-stmias.c: Likewise.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-09-03  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r214526.
+	2014-08-26  Joseph Myers  <joseph@codesourcery.com>
+
+	PR target/60606
+	PR target/61330
+	* gcc.dg/torture/pr60606-1.c, gcc.target/arm/pr60606-2.c,
+	gcc.target/arm/pr60606-3.c, gcc.target/arm/pr60606-4.c: New tests.
+
+2014-09-03  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r213659.
+	2014-08-06  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* gcc.target/aarch64/vdup_n_2.c: New test.
+
+2014-08-26  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r213701.
+	2014-08-07  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* gcc.dg/pr61756.c: Remove arm-specific dg-options.
+
+2014-08-26  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r213488, r213489.
+	2014-08-01  Jiong Wang  <jiong.wang@arm.com>
+
+	* gcc.target/aarch64/legitimize_stack_var_before_reload_1.c: New
+	testcase.
+
+2014-08-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r212927.
+	2014-07-23  Jiong Wang  <jiong.wang@arm.com>
+
+	* gcc.dg/ira-shrinkwrap-prep-1.c (target): Add arm_nothumb.
+	* gcc.dg/ira-shrinkwrap-prep-2.c (target): Likewise.
+	* gcc.dg/pr10474.c (target): Likewise.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-08-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r213555.
+	2014-08-04  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	PR target/61713
+	* gcc.dg/pr61756.c: New test.
+
+2014-08-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r213376.
+	2014-07-31  Charles Baylis  <charles.baylis@linaro.org>
+
+	PR target/61948
+	* gcc.target/arm/pr61948.c: New test case.
+
+2014-08-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r212959, r212976, r212999, r213000.
+	2014-07-24  Jiong Wang  <jiong.wang@arm.com>
+
+	* gcc.target/aarch64/test_frame_1.c: Match optimized instruction
+	sequences.
+	* gcc.target/aarch64/test_frame_2.c: Likewise.
+	* gcc.target/aarch64/test_frame_4.c: Likewise.
+	* gcc.target/aarch64/test_frame_6.c: Likewise.
+	* gcc.target/aarch64/test_frame_7.c: Likewise.
+	* gcc.target/aarch64/test_frame_8.c: Likewise.
+	* gcc.target/aarch64/test_frame_10.c: Likewise.
+
+	2014-07-24  Jiong Wang  <jiong.wang@arm.com>
+
+	* gcc.target/aarch64/test_frame_1.c: Match optimized instruction
+	sequences.
+	* gcc.target/aarch64/test_frame_10.c: Likewise.
+	* gcc.target/aarch64/test_frame_2.c: Likewise.
+	* gcc.target/aarch64/test_frame_4.c: Likewise.
+	* gcc.target/aarch64/test_frame_6.c: Likewise.
+	* gcc.target/aarch64/test_frame_7.c: Likewise.
+	* gcc.target/aarch64/test_frame_8.c: Likewise.
+	* gcc.target/aarch64/test_fp_attribute_1.c: Likewise.
+
+	2014-07-24  Jiong Wang  <jiong.wang@arm.com>
+
+	* gcc.target/aarch64/test_frame_12.c: Match optimized instruction
+	sequences.
+
+	2014-07-23  Jiong Wang  <jiong.wang@arm.com>
+
+	* gcc.target/aarch64/test_frame_common.h: New file.
+	* gcc.target/aarch64/test_frame_1.c: Likewise.
+	* gcc.target/aarch64/test_frame_2.c: Likewise.
+	* gcc.target/aarch64/test_frame_3.c: Likewise.
+	* gcc.target/aarch64/test_frame_4.c: Likewise.
+	* gcc.target/aarch64/test_frame_5.c: Likewise.
+	* gcc.target/aarch64/test_frame_6.c: Likewise.
+	* gcc.target/aarch64/test_frame_7.c: Likewise.
+	* gcc.target/aarch64/test_frame_8.c: Likewise.
+	* gcc.target/aarch64/test_frame_9.c: Likewise.
+	* gcc.target/aarch64/test_frame_10.c: Likewise.
+	* gcc.target/aarch64/test_frame_11.c: Likewise.
+	* gcc.target/aarch64/test_frame_12.c: Likewise.
+	* gcc.target/aarch64/test_frame_13.c: Likewise.
+	* gcc.target/aarch64/test_frame_14.c: Likewise.
+	* gcc.target/aarch64/test_frame_15.c: Likewise.
+
+2014-08-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r212023, r212024.
+	2014-06-26  Vidya Praveen  <vidyapraveen@arm.com>
+
+	* gcc.dg/inline-22.c: Add bind_pic_locally.
+	* gcc.dg/inline_4.c: Ditto.
+	* gcc.dg/fail_always_inline.c: Ditto.
+	* g++.dg/ipa/devirt-25.C: Ditto.
+
+	2014-06-26  Vidya Praveen  <vidyapraveen@arm.com>
+
+	* lib/target-support.exp (bind_pic_locally): Save the flags to
+	'flags_to_postpone' instead of appending to 'flags'.
+	* lib/gcc.exp (gcc_target_compile): Append board_info's multilib_flags
+	with flags_to_postpone and revert after target_compile.
+	* lib/g++.exp (g++_target_compile): Ditto.
+	* lib/gfortran.exp (gfortran_target_compile): Ditto.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r211887.
+	2014-06-23  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* gcc.target/aarch64/scalar_shift_1.c: Fix expected assembler.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r211441.
+	2014-06-11  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* gcc.target/aarch64/acle/acle.exp: New.
+	* gcc.target/aarch64/acle/crc32b.c: New test.
+	* gcc.target/aarch64/acle/crc32cb.c: Likewise.
+	* gcc.target/aarch64/acle/crc32cd.c: Likewise.
+	* gcc.target/aarch64/acle/crc32ch.c: Likewise.
+	* gcc.target/aarch64/acle/crc32cw.c: Likewise.
+	* gcc.target/aarch64/acle/crc32d.c: Likewise.
+	* gcc.target/aarch64/acle/crc32h.c: Likewise.
+	* gcc.target/aarch64/acle/crc32w.c: Likewise.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r210153.
+	2014-05-07  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* gcc.target/aarch64/simd/vrev16p8_1.c: New file.
+	* gcc.target/aarch64/simd/vrev16p8.x: New file.
+	* gcc.target/aarch64/simd/vrev16qp8_1.c: New file.
+	* gcc.target/aarch64/simd/vrev16qp8.x: New file.
+	* gcc.target/aarch64/simd/vrev16qs8_1.c: New file.
+	* gcc.target/aarch64/simd/vrev16qs8.x: New file.
+	* gcc.target/aarch64/simd/vrev16qu8_1.c: New file.
+	* gcc.target/aarch64/simd/vrev16qu8.x: New file.
+	* gcc.target/aarch64/simd/vrev16s8_1.c: New file.
+	* gcc.target/aarch64/simd/vrev16s8.x: New file.
+	* gcc.target/aarch64/simd/vrev16u8_1.c: New file.
+	* gcc.target/aarch64/simd/vrev16u8.x: New file.
+	* gcc.target/aarch64/simd/vrev32p16_1.c: New file.
+	* gcc.target/aarch64/simd/vrev32p16.x: New file.
+	* gcc.target/aarch64/simd/vrev32p8_1.c: New file.
+	* gcc.target/aarch64/simd/vrev32p8.x: New file.
+	* gcc.target/aarch64/simd/vrev32qp16_1.c: New file.
+	* gcc.target/aarch64/simd/vrev32qp16.x: New file.
+	* gcc.target/aarch64/simd/vrev32qp8_1.c: New file.
+	* gcc.target/aarch64/simd/vrev32qp8.x: New file.
+	* gcc.target/aarch64/simd/vrev32qs16_1.c: New file.
+	* gcc.target/aarch64/simd/vrev32qs16.x: New file.
+	* gcc.target/aarch64/simd/vrev32qs8_1.c: New file.
+	* gcc.target/aarch64/simd/vrev32qs8.x: New file.
+	* gcc.target/aarch64/simd/vrev32qu16_1.c: New file.
+	* gcc.target/aarch64/simd/vrev32qu16.x: New file.
+	* gcc.target/aarch64/simd/vrev32qu8_1.c: New file.
+	* gcc.target/aarch64/simd/vrev32qu8.x: New file.
+	* gcc.target/aarch64/simd/vrev32s16_1.c: New file.
+	* gcc.target/aarch64/simd/vrev32s16.x: New file.
+	* gcc.target/aarch64/simd/vrev32s8_1.c: New file.
+	* gcc.target/aarch64/simd/vrev32s8.x: New file.
+	* gcc.target/aarch64/simd/vrev32u16_1.c: New file.
+	* gcc.target/aarch64/simd/vrev32u16.x: New file.
+	* gcc.target/aarch64/simd/vrev32u8_1.c: New file.
+	* gcc.target/aarch64/simd/vrev32u8.x: New file.
+	* gcc.target/aarch64/simd/vrev64f32_1.c: New file.
+	* gcc.target/aarch64/simd/vrev64f32.x: New file.
+	* gcc.target/aarch64/simd/vrev64p16_1.c: New file.
+	* gcc.target/aarch64/simd/vrev64p16.x: New file.
+	* gcc.target/aarch64/simd/vrev64p8_1.c: New file.
+	* gcc.target/aarch64/simd/vrev64p8.x: New file.
+	* gcc.target/aarch64/simd/vrev64qf32_1.c: New file.
+	* gcc.target/aarch64/simd/vrev64qf32.x: New file.
+	* gcc.target/aarch64/simd/vrev64qp16_1.c: New file.
+	* gcc.target/aarch64/simd/vrev64qp16.x: New file.
+	* gcc.target/aarch64/simd/vrev64qp8_1.c: New file.
+	* gcc.target/aarch64/simd/vrev64qp8.x: New file.
+	* gcc.target/aarch64/simd/vrev64qs16_1.c: New file.
+	* gcc.target/aarch64/simd/vrev64qs16.x: New file.
+	* gcc.target/aarch64/simd/vrev64qs32_1.c: New file.
+	* gcc.target/aarch64/simd/vrev64qs32.x: New file.
+	* gcc.target/aarch64/simd/vrev64qs8_1.c: New file.
+	* gcc.target/aarch64/simd/vrev64qs8.x: New file.
+	* gcc.target/aarch64/simd/vrev64qu16_1.c: New file.
+	* gcc.target/aarch64/simd/vrev64qu16.x: New file.
+	* gcc.target/aarch64/simd/vrev64qu32_1.c: New file.
+	* gcc.target/aarch64/simd/vrev64qu32.x: New file.
+	* gcc.target/aarch64/simd/vrev64qu8_1.c: New file.
+	* gcc.target/aarch64/simd/vrev64qu8.x: New file.
+	* gcc.target/aarch64/simd/vrev64s16_1.c: New file.
+	* gcc.target/aarch64/simd/vrev64s16.x: New file.
+	* gcc.target/aarch64/simd/vrev64s32_1.c: New file.
+	* gcc.target/aarch64/simd/vrev64s32.x: New file.
+	* gcc.target/aarch64/simd/vrev64s8_1.c: New file.
+	* gcc.target/aarch64/simd/vrev64s8.x: New file.
+	* gcc.target/aarch64/simd/vrev64u16_1.c: New file.
+	* gcc.target/aarch64/simd/vrev64u16.x: New file.
+	* gcc.target/aarch64/simd/vrev64u32_1.c: New file.
+	* gcc.target/aarch64/simd/vrev64u32.x: New file.
+	* gcc.target/aarch64/simd/vrev64u8_1.c: New file.
+	* gcc.target/aarch64/simd/vrev64u8.x: New file.
+
+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r210148, r210151, r210422.
+	2014-05-14  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* gcc.target/arm/simd/vtrnqf32_1.c: New file.
+	* gcc.target/arm/simd/vtrnqp16_1.c: New file.
+	* gcc.target/arm/simd/vtrnqp8_1.c: New file.
+	* gcc.target/arm/simd/vtrnqs16_1.c: New file.
+	* gcc.target/arm/simd/vtrnqs32_1.c: New file.
+	* gcc.target/arm/simd/vtrnqs8_1.c: New file.
+	* gcc.target/arm/simd/vtrnqu16_1.c: New file.
+	* gcc.target/arm/simd/vtrnqu32_1.c: New file.
+	* gcc.target/arm/simd/vtrnqu8_1.c: New file.
+	* gcc.target/arm/simd/vtrnf32_1.c: New file.
+	* gcc.target/arm/simd/vtrnp16_1.c: New file.
+	* gcc.target/arm/simd/vtrnp8_1.c: New file.
+	* gcc.target/arm/simd/vtrns16_1.c: New file.
+	* gcc.target/arm/simd/vtrns32_1.c: New file.
+	* gcc.target/arm/simd/vtrns8_1.c: New file.
+	* gcc.target/arm/simd/vtrnu16_1.c: New file.
+	* gcc.target/arm/simd/vtrnu32_1.c: New file.
+	* gcc.target/arm/simd/vtrnu8_1.c: New file.
+
+	2014-05-07  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* gcc.target/aarch64/vtrns32.c: Expect zip[12] insn rather than trn[12].
+	* gcc.target/aarch64/vtrnu32.c: Likewise.
+	* gcc.target/aarch64/vtrnf32.c: Likewise.
+
+	2014-05-07  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* gcc.target/aarch64/simd/vtrnf32_1.c: New file.
+	* gcc.target/aarch64/simd/vtrnf32.x: New file.
+	* gcc.target/aarch64/simd/vtrnp16_1.c: New file.
+	* gcc.target/aarch64/simd/vtrnp16.x: New file.
+	* gcc.target/aarch64/simd/vtrnp8_1.c: New file.
+	* gcc.target/aarch64/simd/vtrnp8.x: New file.
+	* gcc.target/aarch64/simd/vtrnqf32_1.c: New file.
+	* gcc.target/aarch64/simd/vtrnqf32.x: New file.
+	* gcc.target/aarch64/simd/vtrnqp16_1.c: New file.
+	* gcc.target/aarch64/simd/vtrnqp16.x: New file.
+	* gcc.target/aarch64/simd/vtrnqp8_1.c: New file.
+	* gcc.target/aarch64/simd/vtrnqp8.x: New file.
+	* gcc.target/aarch64/simd/vtrnqs16_1.c: New file.
+	* gcc.target/aarch64/simd/vtrnqs16.x: New file.
+	* gcc.target/aarch64/simd/vtrnqs32_1.c: New file.
+	* gcc.target/aarch64/simd/vtrnqs32.x: New file.
+	* gcc.target/aarch64/simd/vtrnqs8_1.c: New file.
+	* gcc.target/aarch64/simd/vtrnqs8.x: New file.
+	* gcc.target/aarch64/simd/vtrnqu16_1.c: New file.
+	* gcc.target/aarch64/simd/vtrnqu16.x: New file.
+	* gcc.target/aarch64/simd/vtrnqu32_1.c: New file.
+	* gcc.target/aarch64/simd/vtrnqu32.x: New file.
+	* gcc.target/aarch64/simd/vtrnqu8_1.c: New file.
+	* gcc.target/aarch64/simd/vtrnqu8.x: New file.
+	* gcc.target/aarch64/simd/vtrns16_1.c: New file.
+	* gcc.target/aarch64/simd/vtrns16.x: New file.
+	* gcc.target/aarch64/simd/vtrns32_1.c: New file.
+	* gcc.target/aarch64/simd/vtrns32.x: New file.
+	* gcc.target/aarch64/simd/vtrns8_1.c: New file.
+	* gcc.target/aarch64/simd/vtrns8.x: New file.
+	* gcc.target/aarch64/simd/vtrnu16_1.c: New file.
+	* gcc.target/aarch64/simd/vtrnu16.x: New file.
+	* gcc.target/aarch64/simd/vtrnu32_1.c: New file.
+	* gcc.target/aarch64/simd/vtrnu32.x: New file.
+	* gcc.target/aarch64/simd/vtrnu8_1.c: New file.
+	* gcc.target/aarch64/simd/vtrnu8.x: New file.
+
+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209794, 209858.
+	2014-04-25  Marek Polacek  <polacek@redhat.com>
+
+	PR c/60114
+	* gcc.dg/pr60114.c: New test.
+
+	2014-04-28  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	PR c/60983
+	* gcc.dg/pr60114.c: Use signed chars.
+
+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r210861.
+	2014-05-23  Jiong Wang   <jiong.wang@arm.com>
+
+	* gcc.target/aarch64/tail_indirect_call_1.c: New.
+
+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r211314.
+	2014-06-06  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* gcc.dg/tree-ssa/pr42585.c: Skip for AArch64.
+	* gcc.dg/tree-ssa/sra-12.c: Likewise.
+
+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r210967.
+	2014-05-27  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* lib/target-supports.exp (check_effective_target_vect_bswap):
+	Specify arm*-*-* support.
+
+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r210152, 211059.
+	2014-05-29  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* gcc.target/arm/simd/vextQf32_1.c: New file.
+	* gcc.target/arm/simd/vextQp16_1.c: New file.
+	* gcc.target/arm/simd/vextQp8_1.c: New file.
+	* gcc.target/arm/simd/vextQs16_1.c: New file.
+	* gcc.target/arm/simd/vextQs32_1.c: New file.
+	* gcc.target/arm/simd/vextQs64_1.c: New file.
+	* gcc.target/arm/simd/vextQs8_1.c: New file.
+	* gcc.target/arm/simd/vextQu16_1.c: New file.
+	* gcc.target/arm/simd/vextQu32_1.c: New file.
+	* gcc.target/arm/simd/vextQu64_1.c: New file.
+	* gcc.target/arm/simd/vextQu8_1.c: New file.
+	* gcc.target/arm/simd/vextQp64_1.c: New file.
+	* gcc.target/arm/simd/vextf32_1.c: New file.
+	* gcc.target/arm/simd/vextp16_1.c: New file.
+	* gcc.target/arm/simd/vextp8_1.c: New file.
+	* gcc.target/arm/simd/vexts16_1.c: New file.
+	* gcc.target/arm/simd/vexts32_1.c: New file.
+	* gcc.target/arm/simd/vexts64_1.c: New file.
+	* gcc.target/arm/simd/vexts8_1.c: New file.
+	* gcc.target/arm/simd/vextu16_1.c: New file.
+	* gcc.target/arm/simd/vextu32_1.c: New file.
+	* gcc.target/arm/simd/vextu64_1.c: New file.
+	* gcc.target/arm/simd/vextu8_1.c: New file.
+	* gcc.target/arm/simd/vextp64_1.c: New file.
+
+	2014-05-07  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* gcc.target/aarch64/simd/ext_f32.x: New file.
+	* gcc.target/aarch64/simd/ext_f32_1.c: New file.
+	* gcc.target/aarch64/simd/ext_p16.x: New file.
+	* gcc.target/aarch64/simd/ext_p16_1.c: New file.
+	* gcc.target/aarch64/simd/ext_p8.x: New file.
+	* gcc.target/aarch64/simd/ext_p8_1.c: New file.
+	* gcc.target/aarch64/simd/ext_s16.x: New file.
+	* gcc.target/aarch64/simd/ext_s16_1.c: New file.
+	* gcc.target/aarch64/simd/ext_s32.x: New file.
+	* gcc.target/aarch64/simd/ext_s32_1.c: New file.
+	* gcc.target/aarch64/simd/ext_s64.x: New file.
+	* gcc.target/aarch64/simd/ext_s64_1.c: New file.
+	* gcc.target/aarch64/simd/ext_s8.x: New file.
+	* gcc.target/aarch64/simd/ext_s8_1.c: New file.
+	* gcc.target/aarch64/simd/ext_u16.x: New file.
+	* gcc.target/aarch64/simd/ext_u16_1.c: New file.
+	* gcc.target/aarch64/simd/ext_u32.x: New file.
+	* gcc.target/aarch64/simd/ext_u32_1.c: New file.
+	* gcc.target/aarch64/simd/ext_u64.x: New file.
+	* gcc.target/aarch64/simd/ext_u64_1.c: New file.
+	* gcc.target/aarch64/simd/ext_u8.x: New file.
+	* gcc.target/aarch64/simd/ext_u8_1.c: New file.
+	* gcc.target/aarch64/simd/ext_f64.c: New file.
+	* gcc.target/aarch64/simd/extq_f32.x: New file.
+	* gcc.target/aarch64/simd/extq_f32_1.c: New file.
+	* gcc.target/aarch64/simd/extq_p16.x: New file.
+	* gcc.target/aarch64/simd/extq_p16_1.c: New file.
+	* gcc.target/aarch64/simd/extq_p8.x: New file.
+	* gcc.target/aarch64/simd/extq_p8_1.c: New file.
+	* gcc.target/aarch64/simd/extq_s16.x: New file.
+	* gcc.target/aarch64/simd/extq_s16_1.c: New file.
+	* gcc.target/aarch64/simd/extq_s32.x: New file.
+	* gcc.target/aarch64/simd/extq_s32_1.c: New file.
+	* gcc.target/aarch64/simd/extq_s64.x: New file.
+	* gcc.target/aarch64/simd/extq_s64_1.c: New file.
+	* gcc.target/aarch64/simd/extq_s8.x: New file.
+	* gcc.target/aarch64/simd/extq_s8_1.c: New file.
+	* gcc.target/aarch64/simd/extq_u16.x: New file.
+	* gcc.target/aarch64/simd/extq_u16_1.c: New file.
+	* gcc.target/aarch64/simd/extq_u32.x: New file.
+
+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209940, r209943, r209947.
+	2014-04-30  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* gcc.target/arm/simd/vuzpqf32_1.c: New file.
+	* gcc.target/arm/simd/vuzpqp16_1.c: New file.
+	* gcc.target/arm/simd/vuzpqp8_1.c: New file.
+	* gcc.target/arm/simd/vuzpqs16_1.c: New file.
+	* gcc.target/arm/simd/vuzpqs32_1.c: New file.
+	* gcc.target/arm/simd/vuzpqs8_1.c: New file.
+	* gcc.target/arm/simd/vuzpqu16_1.c: New file.
+	* gcc.target/arm/simd/vuzpqu32_1.c: New file.
+	* gcc.target/arm/simd/vuzpqu8_1.c: New file.
+	* gcc.target/arm/simd/vuzpf32_1.c: New file.
+	* gcc.target/arm/simd/vuzpp16_1.c: New file.
+	* gcc.target/arm/simd/vuzpp8_1.c: New file.
+	* gcc.target/arm/simd/vuzps16_1.c: New file.
+	* gcc.target/arm/simd/vuzps32_1.c: New file.
+	* gcc.target/arm/simd/vuzps8_1.c: New file.
+	* gcc.target/arm/simd/vuzpu16_1.c: New file.
+	* gcc.target/arm/simd/vuzpu32_1.c: New file.
+	* gcc.target/arm/simd/vuzpu8_1.c: New file.
+
+	2014-04-30  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* gcc.target/aarch64/vuzps32_1.c: Expect zip1/2 insn rather than uzp1/2.
+	* gcc.target/aarch64/vuzpu32_1.c: Likewise.
+	* gcc.target/aarch64/vuzpf32_1.c: Likewise.
+
+	2014-04-30  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* gcc.target/aarch64/simd/vuzpf32_1.c: New file.
+	* gcc.target/aarch64/simd/vuzpf32.x: New file.
+	* gcc.target/aarch64/simd/vuzpp16_1.c: New file.
+	* gcc.target/aarch64/simd/vuzpp16.x: New file.
+	* gcc.target/aarch64/simd/vuzpp8_1.c: New file.
+	* gcc.target/aarch64/simd/vuzpp8.x: New file.
+	* gcc.target/aarch64/simd/vuzpqf32_1.c: New file.
+	* gcc.target/aarch64/simd/vuzpqf32.x: New file.
+	* gcc.target/aarch64/simd/vuzpqp16_1.c: New file.
+	* gcc.target/aarch64/simd/vuzpqp16.x: New file.
+	* gcc.target/aarch64/simd/vuzpqp8_1.c: New file.
+	* gcc.target/aarch64/simd/vuzpqp8.x: New file.
+	* gcc.target/aarch64/simd/vuzpqs16_1.c: New file.
+	* gcc.target/aarch64/simd/vuzpqs16.x: New file.
+	* gcc.target/aarch64/simd/vuzpqs32_1.c: New file.
+	* gcc.target/aarch64/simd/vuzpqs32.x: New file.
+	* gcc.target/aarch64/simd/vuzpqs8_1.c: New file.
+	* gcc.target/aarch64/simd/vuzpqs8.x: New file.
+	* gcc.target/aarch64/simd/vuzpqu16_1.c: New file.
+	* gcc.target/aarch64/simd/vuzpqu16.x: New file.
+	* gcc.target/aarch64/simd/vuzpqu32_1.c: New file.
+	* gcc.target/aarch64/simd/vuzpqu32.x: New file.
+	* gcc.target/aarch64/simd/vuzpqu8_1.c: New file.
+	* gcc.target/aarch64/simd/vuzpqu8.x: New file.
+	* gcc.target/aarch64/simd/vuzps16_1.c: New file.
+	* gcc.target/aarch64/simd/vuzps16.x: New file.
+	* gcc.target/aarch64/simd/vuzps32_1.c: New file.
+	* gcc.target/aarch64/simd/vuzps32.x: New file.
+	* gcc.target/aarch64/simd/vuzps8_1.c: New file.
+	* gcc.target/aarch64/simd/vuzps8.x: New file.
+	* gcc.target/aarch64/simd/vuzpu16_1.c: New file.
+	* gcc.target/aarch64/simd/vuzpu16.x: New file.
+	* gcc.target/aarch64/simd/vuzpu32_1.c: New file.
+	* gcc.target/aarch64/simd/vuzpu32.x: New file.
+	* gcc.target/aarch64/simd/vuzpu8_1.c: New file.
+	* gcc.target/aarch64/simd/vuzpu8.x: New file.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-13  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r211206.
+	2014-06-03  Andrew Pinski  <apinski@cavium.com>
+
+	* gcc.c-torture/compile/20140528-1.c: New testcase.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209908.
+	2013-04-29  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* gcc.target/arm/simd/simd.exp: New file.
+	* gcc.target/arm/simd/vzipqf32_1.c: New file.
+	* gcc.target/arm/simd/vzipqp16_1.c: New file.
+	* gcc.target/arm/simd/vzipqp8_1.c: New file.
+	* gcc.target/arm/simd/vzipqs16_1.c: New file.
+	* gcc.target/arm/simd/vzipqs32_1.c: New file.
+	* gcc.target/arm/simd/vzipqs8_1.c: New file.
+	* gcc.target/arm/simd/vzipqu16_1.c: New file.
+	* gcc.target/arm/simd/vzipqu32_1.c: New file.
+	* gcc.target/arm/simd/vzipqu8_1.c: New file.
+	* gcc.target/arm/simd/vzipf32_1.c: New file.
+	* gcc.target/arm/simd/vzipp16_1.c: New file.
+	* gcc.target/arm/simd/vzipp8_1.c: New file.
+	* gcc.target/arm/simd/vzips16_1.c: New file.
+	* gcc.target/arm/simd/vzips32_1.c: New file.
+	* gcc.target/arm/simd/vzips8_1.c: New file.
+	* gcc.target/arm/simd/vzipu16_1.c: New file.
+	* gcc.target/arm/simd/vzipu32_1.c: New file.
+	* gcc.target/arm/simd/vzipu8_1.c: New file.
+
+2014-05-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209893.
+	2014-04-29  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* gcc.target/aarch64/simd/simd.exp: New file.
+	* gcc.target/aarch64/simd/vzipf32_1.c: New file.
+	* gcc.target/aarch64/simd/vzipf32.x: New file.
+	* gcc.target/aarch64/simd/vzipp16_1.c: New file.
+	* gcc.target/aarch64/simd/vzipp16.x: New file.
+	* gcc.target/aarch64/simd/vzipp8_1.c: New file.
+	* gcc.target/aarch64/simd/vzipp8.x: New file.
+	* gcc.target/aarch64/simd/vzipqf32_1.c: New file.
+	* gcc.target/aarch64/simd/vzipqf32.x: New file.
+	* gcc.target/aarch64/simd/vzipqp16_1.c: New file.
+	* gcc.target/aarch64/simd/vzipqp16.x: New file.
+	* gcc.target/aarch64/simd/vzipqp8_1.c: New file.
+	* gcc.target/aarch64/simd/vzipqp8.x: New file.
+	* gcc.target/aarch64/simd/vzipqs16_1.c: New file.
+	* gcc.target/aarch64/simd/vzipqs16.x: New file.
+	* gcc.target/aarch64/simd/vzipqs32_1.c: New file.
+	* gcc.target/aarch64/simd/vzipqs32.x: New file.
+	* gcc.target/aarch64/simd/vzipqs8_1.c: New file.
+	* gcc.target/aarch64/simd/vzipqs8.x: New file.
+	* gcc.target/aarch64/simd/vzipqu16_1.c: New file.
+	* gcc.target/aarch64/simd/vzipqu16.x: New file.
+	* gcc.target/aarch64/simd/vzipqu32_1.c: New file.
+	* gcc.target/aarch64/simd/vzipqu32.x: New file.
+	* gcc.target/aarch64/simd/vzipqu8_1.c: New file.
+	* gcc.target/aarch64/simd/vzipqu8.x: New file.
+	* gcc.target/aarch64/simd/vzips16_1.c: New file.
+	* gcc.target/aarch64/simd/vzips16.x: New file.
+	* gcc.target/aarch64/simd/vzips32_1.c: New file.
+	* gcc.target/aarch64/simd/vzips32.x: New file.
+	* gcc.target/aarch64/simd/vzips8_1.c: New file.
+	* gcc.target/aarch64/simd/vzips8.x: New file.
+	* gcc.target/aarch64/simd/vzipu16_1.c: New file.
+	* gcc.target/aarch64/simd/vzipu16.x: New file.
+	* gcc.target/aarch64/simd/vzipu32_1.c: New file.
+	* gcc.target/aarch64/simd/vzipu32.x: New file.
+	* gcc.target/aarch64/simd/vzipu8_1.c: New file.
+	* gcc.target/aarch64/simd/vzipu8.x: New file.
+
+2014-05-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209808.
+	2014-04-25  Jiong Wang  <jiong.wang@arm.com>
+
+	* gcc.target/arm/tail-long-call.c: New test.
+
+2014-05-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209749.
+	2014-04-24  Alan Lawrence  <alan.lawrence@arm.com>
+
+	* lib/target-supports.exp (check_effective_target_vect_perm): Return
+	true for aarch64_be.
+
+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209736.
+	2014-04-24  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* lib/target-supports.exp (check_effective_target_vect_bswap): New.
+	* gcc.dg/vect/vect-bswap16: New test.
+	* gcc.dg/vect/vect-bswap32: Likewise.
+	* gcc.dg/vect/vect-bswap64: Likewise.
+
+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209713.
+	2014-04-23  Alex Velenko  <Alex.Velenko@arm.com>
+
+	* gcc.target/aarch64/vdup_lane_1.c: New testcase.
+	* gcc.target/aarch64/vdup_lane_2.c: New testcase.
+	* gcc.target/aarch64/vdup_n_1.c: New testcase.
+
+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209704, 209705.
+	2014-04-23  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* gcc.target/arm/rev16.c: New test.
+
+	2014-04-23  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* gcc.target/aarch64/rev16_1.c: New test.
+
+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209642.
+	2014-04-22  Alex Velenko  <Alex.Velenko@arm.com>
+
+	* gcc.target/aarch64/vreinterpret_f64_1.c: New.
+
+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209640.
+	2014-04-22  Alex Velenko  <Alex.Velenko@arm.com>
+
+	* gcc.target/aarch64/vqneg_s64_1.c: New testcase.
+	* gcc.target/aarch64/vqabs_s64_1.c: New testcase.
+
+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209613, 209614.
+	2014-04-22  Ian Bolton  <ian.bolton@arm.com>
+
+	* gcc.target/arm/anddi_notdi-1.c: New test.
+	* gcc.target/arm/iordi_notdi-1.c: New test case.
+
+	2014-04-22  Ian Bolton  <ian.bolton@arm.com>
+
+	* gcc.target/arm/iordi_notdi-1.c: New test.
+
+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209559.
+	2014-04-22  Alex Velenko  <Alex.Velenko@arm.com>
+
+	* gcc.target/aarch64/vrnd_f64_1.c : New file.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-05-13  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209889.
+	2014-04-29  Zhenqiang Chen  <zhenqiang.chen@linaro.org>
+
+	* gcc.target/aarch64/fcsel_1.c: New test case.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
--- a/src/gcc/testsuite/gcc.c-torture/execute/pr63843.c
+++ b/src/gcc/testsuite/gcc.c-torture/execute/pr63843.c
@@ -0,0 +1,31 @@
+/* PR rtl-optimization/63843 */
+
+static inline __attribute__ ((always_inline))
+unsigned short foo (unsigned short v)
+{
+  return (v << 8) | (v >> 8);
+}
+
+unsigned short __attribute__ ((noinline, noclone, hot))
+bar (unsigned char *x)
+{
+  unsigned int a;
+  unsigned short b;
+  __builtin_memcpy (&a, &x[0], sizeof (a));
+  a ^= 0x80808080U;
+  __builtin_memcpy (&x[0], &a, sizeof (a));
+  __builtin_memcpy (&b, &x[2], sizeof (b));
+  return foo (b);
+}
+
+int
+main ()
+{
+  unsigned char x[8] = { 0x01, 0x01, 0x01, 0x01 };
+  if (__CHAR_BIT__ == 8
+      && sizeof (short) == 2
+      && sizeof (int) == 4
+      && bar (x) != 0x8181U)
+    __builtin_abort ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.c-torture/compile/pr65735.c
+++ b/src/gcc/testsuite/gcc.c-torture/compile/pr65735.c
@@ -0,0 +1,21 @@
+/* PR tree-optimization/65735 */
+
+int foo (void);
+
+void
+bar (int a, int b, int c)
+{
+  while (!a)
+    {
+      c = foo ();
+      if (c == 7)
+	c = b;
+      switch (c)
+	{
+	case 1:
+	  a = b++;
+	  if (b)
+	    b = 1;
+	}
+    }
+}
--- a/src/gcc/testsuite/gcc.c-torture/compile/20140528-1.c
+++ b/src/gcc/testsuite/gcc.c-torture/compile/20140528-1.c
@@ -0,0 +1,9 @@
+unsigned f(unsigned flags, unsigned capabilities)
+{
+  unsigned gfp_mask;
+  unsigned gfp_notmask = 0;
+  gfp_mask = flags & ((1 << 25) - 1);
+  if (!(capabilities & 0x00000001))
+    gfp_mask |= 0x1000000u;
+  return (gfp_mask & ~gfp_notmask);
+}
--- a/src/gcc/testsuite/gnat.dg/opt46.adb
+++ b/src/gcc/testsuite/gnat.dg/opt46.adb
@@ -0,0 +1,45 @@
+-- { dg-do compile }
+-- { dg-options "-O" }
+
+with Ada.Unchecked_Deallocation;
+
+with Opt46_Pkg;
+
+package body Opt46 is
+
+   type Pattern is abstract tagged null record;
+
+   type Pattern_Access is access Pattern'Class;
+
+   procedure Free is new Ada.Unchecked_Deallocation
+     (Pattern'Class, Pattern_Access);
+
+   type Action is abstract tagged null record;
+
+   type Action_Access is access Action'Class;
+
+   procedure Free is new Ada.Unchecked_Deallocation
+     (Action'Class, Action_Access);
+
+   type Pattern_Action is record
+      Pattern : Pattern_Access;
+      Action  : Action_Access;
+   end record;
+
+   package Pattern_Action_Table is new Opt46_Pkg (Pattern_Action, Natural, 1);
+
+   type Session_Data is record
+      Filters : Pattern_Action_Table.Instance;
+   end record;
+
+   procedure Close (Session : Session_Type) is
+      Filters : Pattern_Action_Table.Instance renames Session.Data.Filters;
+   begin
+      for F in 1 .. Pattern_Action_Table.Last (Filters) loop
+         Free (Filters.Table (F).Pattern);
+         Free (Filters.Table (F).Action);
+      end loop;
+
+   end Close;
+
+end Opt46;
--- a/src/gcc/testsuite/gnat.dg/opt46.ads
+++ b/src/gcc/testsuite/gnat.dg/opt46.ads
@@ -0,0 +1,16 @@
+package Opt46 is
+
+   type Session_Type is limited private;
+
+   procedure Close (Session : Session_Type);
+
+private
+
+   type Session_Data;
+   type Session_Data_Access is access Session_Data;
+
+   type Session_Type is record
+      Data : Session_Data_Access;
+   end record;
+
+end Opt46;
--- a/src/gcc/testsuite/gnat.dg/opt46_pkg.adb
+++ b/src/gcc/testsuite/gnat.dg/opt46_pkg.adb
@@ -0,0 +1,8 @@
+package body Opt46_Pkg is
+
+   function Last (T : Instance) return Table_Index_Type is
+   begin
+      return Table_Index_Type (T.P.Last_Val);
+   end Last;
+
+end Opt46_Pkg;
--- a/src/gcc/testsuite/gnat.dg/opt46_pkg.ads
+++ b/src/gcc/testsuite/gnat.dg/opt46_pkg.ads
@@ -0,0 +1,31 @@
+generic
+   type Table_Component_Type is private;
+   type Table_Index_Type     is range <>;
+
+   Table_Low_Bound : Table_Index_Type;
+
+package Opt46_Pkg is
+
+   type Table_Type is
+     array (Table_Index_Type range <>) of Table_Component_Type;
+   subtype Big_Table_Type is
+     Table_Type (Table_Low_Bound .. Table_Index_Type'Last);
+
+   type Table_Ptr is access all Big_Table_Type;
+
+   type Table_Private is private;
+
+   type Instance is record
+      Table : aliased Table_Ptr := null;
+      P : Table_Private;
+   end record;
+
+   function Last (T : Instance) return Table_Index_Type;
+
+private
+
+   type Table_Private is record
+      Last_Val : Integer;
+   end record;
+
+end Opt46_Pkg;
--- a/src/gcc/testsuite/gcc.dg/asr_div1.c
+++ b/src/gcc/testsuite/gcc.dg/asr_div1.c
@@ -0,0 +1,56 @@
+/* Test division by const int generates only one shift.  */
+/* { dg-do run } */
+/* { dg-options "-O2 -fdump-rtl-combine-all" } */
+
+extern void abort (void);
+
+#define NOINLINE __attribute__((noinline))
+
+static NOINLINE int
+f1 (int n)
+{
+  return n / 33;
+}
+
+static NOINLINE int
+f2 (int n)
+{
+  return n / 77;
+}
+
+int
+main ()
+{
+  int a = 0xaaaaaaaa;
+  int b = 0x55555555;
+  int c;
+  c = f1 (a);
+  if (c != 0xfd6a052c)
+    abort ();
+  c = f1 (b);
+  if (c != 0x295FAD4)
+    abort ();
+  c = f2 (a);
+  if (c != 0xfee44b5c)
+    abort ();
+  c = f2 (b);
+  if (c != 0x11bb4a4)
+    abort ();
+  return 0;
+}
+
+/* Following replacement pattern of intger division by constant, GCC is expected
+   to generate MULT and (x)SHIFTRT.  This test checks that considering division
+   by const 33, gcc generates a single ASHIFTRT by 35, instead of two - LSHIFTRT
+   by 32 and ASHIFTRT by 3.  */
+
+/* { dg-final { scan-rtl-dump "\\(set \\(subreg:DI \\(reg:SI" "combine" { target aarch64*-*-* } } } */
+/* { dg-final { scan-rtl-dump "\\(ashiftrt:DI \\(reg:DI" "combine" { target aarch64*-*-* } } } */
+/* { dg-final { scan-rtl-dump "\\(const_int 35 " "combine" { target aarch64*-*-* } } } */
+
+/* Similarly, considering division by const 77, gcc generates a single ASHIFTRT
+   by 36, instead of two - LSHIFTRT by 32 and ASHIFTRT by 4.  */
+
+/* { dg-final { scan-rtl-dump "\\(const_int 36 " "combine" { target aarch64*-*-* } } } */
+
+/* { dg-final { cleanup-rtl-dump "combine" } } */
--- a/src/gcc/testsuite/gcc.dg/fail_always_inline.c
+++ b/src/gcc/testsuite/gcc.dg/fail_always_inline.c
@@ -1,4 +1,5 @@
 /* { dg-do compile } */
+/* { dg-add-options bind_pic_locally } */
 
 extern __attribute__ ((always_inline)) void
  bar() { } /* { dg-warning "function might not be inlinable" } */
--- a/src/gcc/testsuite/gcc.dg/ira-shrinkwrap-prep-1.c
+++ b/src/gcc/testsuite/gcc.dg/ira-shrinkwrap-prep-1.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { { x86_64-*-* && lp64 } || { powerpc*-*-* && lp64 } } } } */
+/* { dg-do compile { target { { x86_64-*-* && lp64 } || { { powerpc*-*-* && lp64 } || arm_nothumb } } } } */
 /* { dg-options "-O3 -fdump-rtl-ira -fdump-rtl-pro_and_epilogue"  } */
 
 long __attribute__((noinline, noclone))
--- a/src/gcc/testsuite/gcc.dg/pr10474.c
+++ b/src/gcc/testsuite/gcc.dg/pr10474.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { { x86_64-*-* && lp64 } || { powerpc*-*-* && lp64 } } } } */
+/* { dg-do compile { target { { x86_64-*-* && lp64 } || { { powerpc*-*-* && lp64 } || arm_nothumb } } } } */
 /* { dg-options "-O3 -fdump-rtl-pro_and_epilogue"  } */
 
 void f(int *i)
--- a/src/gcc/testsuite/gcc.dg/ssp-4.c
+++ b/src/gcc/testsuite/gcc.dg/ssp-4.c
@@ -0,0 +1,18 @@
+/* { dg-do assemble } */
+/* { dg-options "-fstack-protector-strong -O1 -frename-registers" } */
+/* { dg-require-effective-target fstack_protector } */
+
+typedef unsigned int uint32_t;
+struct ctx
+{
+  uint32_t A;
+};
+
+void *
+buffer_copy (const struct ctx *ctx, void *resbuf)
+{
+  uint32_t buffer[4];
+  buffer[0] = (ctx->A);
+  __builtin_memcpy (resbuf, buffer, sizeof (buffer));
+  return resbuf;
+}
--- a/src/gcc/testsuite/gcc.dg/ira-shrinkwrap-prep-2.c
+++ b/src/gcc/testsuite/gcc.dg/ira-shrinkwrap-prep-2.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { { x86_64-*-* && lp64 } || { powerpc*-*-* && lp64 } } } } */
+/* { dg-do compile { target { { x86_64-*-* && lp64 } || { { powerpc*-*-* && lp64 } || arm_nothumb } } } } */
 /* { dg-options "-O3 -fdump-rtl-ira -fdump-rtl-pro_and_epilogue"  } */
 
 long __attribute__((noinline, noclone))
--- a/src/gcc/testsuite/gcc.dg/pr64935-1.c
+++ b/src/gcc/testsuite/gcc.dg/pr64935-1.c
@@ -0,0 +1,55 @@
+/* PR rtl-optimization/64935 */
+/* { dg-do compile } */
+/* { dg-options "-std=gnu89 -O2 -fcompare-debug" } */
+/* { dg-prune-output "right shift count >= width of type" } */
+
+int a[] = {}, b[] = {}, c[] = {}, d[] = {}, e[] = {}, f[] = {}, h[] = {};
+int g[] = { 0 };
+int i, l, s, w, x, y, z, t2, t3, t5;
+unsigned long j, m, o, t4;
+long k, n, p, q, r, t, u, v, t1;
+fn1 ()
+{
+  int t6;
+  for (; i; i++)
+    {
+      t5 = a[q] ^ b[p >> 1] ^ c[o >> 1 & 1] ^ d[n >> 1 & 1] ^ e[m >> 1 & 1]
+           ^ f[l >> 1 & 1] ^ g[0] ^ h[j & 1];
+      t4 = a[j] ^ b[q >> 1] ^ c[p] ^ d[o] ^ e[n] ^ f[m] ^ g[l >> 8] ^ h[k];
+      t3 = a[k >> 1] ^ b[j & 5] ^ d[p >> 32] ^ e[o >> 4] ^ f[n >> 6]
+           ^ g[m >> 8] ^ h[l];
+      t2 = a[l >> 6] ^ b[k & 1] ^ c[j >> 1] ^ d[q >> 32] ^ e[p >> 4]
+           ^ f[o >> 6] ^ g[n >> 8] ^ h[m & 1];
+      t1 = a[m >> 6] ^ b[l & 1] ^ c[k & 15] ^ d[j >> 2] ^ e[q >> 4] ^ f[p >> 6]
+           ^ g[o >> 8] ^ h[n & 1];
+      z = a[n >> 56] ^ b[m & 15] ^ c[l & 15] ^ d[k >> 2] ^ e[j >> 4]
+          ^ f[q >> 6] ^ g[p >> 8] ^ h[o & 1];
+      y = a[o >> 56] ^ b[n & 15] ^ c[m >> 40] ^ d[l >> 2] ^ e[k >> 4]
+          ^ f[j >> 6] ^ g[q >> 8] ^ h[p & 1];
+      x = a[p >> 56] ^ b[o & 15] ^ c[n >> 40] ^ d[m >> 2] ^ e[l >> 4]
+          ^ f[k >> 6] ^ g[j >> 8] ^ h[q & 1];
+      q = j = t4;
+      k = t3;
+      l = t2;
+      m = t1;
+      n = z;
+      o = y;
+      p = a[t6] ^ b[0] ^ c[w] ^ d[v] ^ e[u] ^ f[t] ^ g[s] ^ h[r];
+      t4 = a[r >> 1] ^ b[t6 & 1] ^ d[w >> 1] ^ e[v >> 1] ^ f[u >> 1]
+           ^ g[t >> 1] ^ h[s];
+      t3 = a[s >> 6] ^ b[r & 1] ^ c[t6 & 5] ^ d[0] ^ e[w >> 4] ^ f[v >> 6]
+           ^ g[u >> 8] ^ h[t & 1];
+      t2 = a[t >> 6] ^ b[s] ^ c[r & 15] ^ d[t6 >> 1] ^ e[0] ^ f[w >> 6]
+           ^ g[v >> 8] ^ h[u & 1];
+      t1 = a[u >> 6] ^ b[t & 15] ^ c[s & 5] ^ d[r >> 32] ^ e[t6 >> 4]
+           ^ g[w >> 8] ^ h[v & 1];
+      z = a[v >> 56] ^ b[u >> 48 & 1] ^ c[t >> 40 & 1] ^ d[s] ^ e[r >> 1 & 1]
+          ^ f[t6 >> 1 & 1] ^ g[0] ^ h[w & 1] ^ z;
+      t6 = t5;
+      r = t4;
+      s = 0;
+      t = u = t1;
+      v = z;
+      w = y;
+    }
+}
--- a/src/gcc/testsuite/gcc.dg/inline-22.c
+++ b/src/gcc/testsuite/gcc.dg/inline-22.c
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-funit-at-a-time -Wno-attributes" } */
+/* { dg-add-options bind_pic_locally } */
 /* Verify we can inline without a complete prototype and with promoted
    arguments.  See also PR32492.  */
 __attribute__((always_inline)) void f1() {}
--- a/src/gcc/testsuite/gcc.dg/pr64007.c
+++ b/src/gcc/testsuite/gcc.dg/pr64007.c
@@ -0,0 +1,50 @@
+/* { dg-options " -O3 " } */
+/* { dg-do run } */
+
+#include <assert.h>
+
+int d, i;
+
+struct S
+{
+  int f0;
+} *b, c, e, h, **g = &b;
+
+static struct S *f = &e;
+
+int
+fn1 (int p)
+{
+  int a = 0;
+  return a || p < 0 || p >= 2 || 1 >> p;
+}
+
+int
+main ()
+{
+  int k = 1, l, *m = &c.f0;
+
+  for (;;)
+    {
+      l = fn1 (i);
+      *m = k && i;
+      if (l)
+	{
+	  int n[1] = {0};
+	}
+      break;
+    }
+
+  *g = &h;
+
+  assert (b);
+
+  if (d)
+    (*m)--;
+  d = (f != 0) | (i >= 0);
+
+  if (c.f0 != 0)
+    __builtin_abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.dg/memset-2.c
+++ b/src/gcc/testsuite/gcc.dg/memset-2.c
@@ -0,0 +1,11 @@
+/* PR target/63937 */
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-O2" } */
+
+void
+foo (char *p)
+{
+  p = __builtin_assume_aligned (p, 64);
+  __builtin_memset (p, 0, 0x100000001ULL);
+}
+
--- a/src/gcc/testsuite/gcc.dg/inline_4.c
+++ b/src/gcc/testsuite/gcc.dg/inline_4.c
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -fdump-tree-optimized -fdisable-tree-einline=foo2 -fdisable-ipa-inline -Wno-attributes" } */
+/* { dg-add-options bind_pic_locally } */
 int g;
 __attribute__((always_inline)) void bar (void)
 {
--- a/src/gcc/testsuite/gcc.dg/pr64935-2.c
+++ b/src/gcc/testsuite/gcc.dg/pr64935-2.c
@@ -0,0 +1,14 @@
+/* PR rtl-optimization/64935 */
+/* { dg-do compile } */
+/* { dg-options "-O -fschedule-insns --param=max-sched-ready-insns=0 -fcompare-debug" } */
+
+void
+foo (int *data, unsigned len, const int qlp_coeff[],
+     unsigned order, int lp, int residual[], int i)
+{
+  int sum;
+  sum = 0;
+  sum += qlp_coeff[1] * data[i - 2];
+  sum += qlp_coeff[0] * data[i - 1];
+  residual[i] = data[i] - (sum >> lp);
+}
--- a/src/gcc/testsuite/gcc.dg/torture/pr64083.c
+++ b/src/gcc/testsuite/gcc.dg/torture/pr64083.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+
+int a, b;
+void
+fn1 ()
+{
+  int c = 0;
+  while (b)
+    {
+      switch (c)
+    case 1:
+	fn1 ();
+	if (a)
+	  c = 1;
+	b = 0;
+    }
+}
--- a/src/gcc/testsuite/gcc.dg/torture/pr60606-1.c
+++ b/src/gcc/testsuite/gcc.dg/torture/pr60606-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-ffat-lto-objects" } */
+
+int
+f (void)
+{
+  register unsigned int r asm ("no-such-register"); /* { dg-error "invalid register name" } */
+  return r;
+}
--- a/src/gcc/testsuite/gcc.dg/torture/pr64284.c
+++ b/src/gcc/testsuite/gcc.dg/torture/pr64284.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+
+int *a;
+int b;
+int
+fn1() {
+    enum { QSTRING } c = 0;
+    while (1) {
+	switch (*a) {
+	  case '\'':
+	  c = 0;
+	  default:
+	  switch (c)
+	  case 0:
+	    if (b)
+	      return 0;
+	    c = 1;
+	}
+	a++;
+    }
+}
--- a/src/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-6.c
+++ b/src/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-6.c
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-dom1-details" } */
+/* { dg-final { scan-tree-dump-times "FSM" 6 "dom1" } } */
+/* { dg-final { cleanup-tree-dump "dom1" } } */
+
+int sum0, sum1, sum2, sum3;
+int foo (char *s, char **ret)
+{
+  int state=0;
+  char c;
+
+  for (; *s && state != 4; s++)
+    {
+      c = *s;
+      if (c == '*')
+	{
+	  s++;
+	  break;
+	}
+      switch (state)
+	{
+	case 0:
+	  if (c == '+')
+	    state = 1;
+	  else if (c != '-')
+	    sum0+=c;
+	  break;
+	case 1:
+	  if (c == '+')
+	    state = 2;
+	  else if (c == '-')
+	    state = 0;
+	  else
+	    sum1+=c;
+	  break;
+	default:
+	  break;
+	}
+
+    }
+  *ret = s;
+  return state;
+}
--- a/src/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-8.c
+++ b/src/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-8.c
@@ -0,0 +1,440 @@
+/* PR 64878 */
+/* { dg-options "-O2" } */
+/* { dg-do run } */
+
+struct A { int a1; };
+struct B { char *b1; int b2; int b3; };
+struct C { char *c1; int c2; struct B *c3; };
+extern struct A *f1 (char *s);
+static struct A *f2 (struct C *x);
+__attribute__ ((noinline, noclone)) int f3 (struct A *x, struct A *z) { asm volatile ("" : : "g" (x), "g" (z) : "memory"); return 0; }
+__attribute__ ((noinline, noclone)) void f4 (struct A *x, char *y, struct A *z) { asm volatile ("" : : "g" (x), "g" (z), "g" (y) : "memory"); }
+__attribute__ ((noinline, noclone)) struct B *f5 (void) { static char b[32]; static struct B f3 = { b, 0, 32 }; return &f3; }
+__attribute__ ((noinline, noclone)) int f6 (struct B *p, char *w, int z) { asm volatile ("" : : "g" (p), "g" (w), "g" (z) : "memory"); return 0; }
+__attribute__ ((noinline, noclone)) void f7 (struct B *p) { asm volatile ("" : : "g" (p) : "memory"); }
+__attribute__ ((noinline, noclone)) void f8 (struct B *p) { asm volatile ("" : : "g" (p) : "memory"); }
+__attribute__ ((noinline, noclone)) void f9 (struct A *x) { asm volatile ("" : : "g" (x) : "memory"); }
+__attribute__ ((noinline, noclone)) struct A *f10 (void) { static struct A j; asm volatile ("" : :  : "memory"); return &j; }
+__attribute__ ((noinline, noclone)) struct A *f11 (void) { static struct A j; asm volatile ("" : :  : "memory"); return &j; }
+__attribute__ ((noinline, noclone)) struct A *f12 (int b) { static struct A j; asm volatile ("" : : "g" (b) : "memory"); return &j; }
+__attribute__ ((noinline, noclone)) struct A *f13 (int i) { static struct A j; asm volatile ("" : : "g" (i) : "memory"); return &j; }
+__attribute__ ((noinline, noclone)) struct A *f14 (double d) { static struct A j; asm volatile ("" : : "g" (&d) : "memory"); return &j; }
+__attribute__ ((noinline, noclone)) struct A *f15 (char *s) { static struct A j; asm volatile ("" : : "g" (s) : "memory"); return &j; }
+char *t = "0123456789abcdef";
+char *u = "0123456789.+-e";
+
+__attribute__ ((noinline, noclone)) struct A *
+f1 (char *s)
+{
+  struct C f;
+  struct A *o;
+  f.c1 = s;
+  f.c2 = 0;
+  f.c3 = f5 ();
+  o = f2 (&f);
+  f8 (f.c3);
+  return o;
+}
+
+static struct A *
+f2 (struct C *x)
+{
+  int a, b, e = 0;
+  struct A *f = 0, *o;
+  char *g = 0;
+  char h = '\0';
+  int i = 0, j = 0;
+  a = 0;
+  b = 1;
+  char c;
+  do
+    {
+      c = x->c1[x->c2];
+      switch (a)
+	{
+	case 0:
+	  if (c == ' ')
+	    x->c2++;
+	  else if (c == '/')
+	    {
+	      a = 4;
+	      j = x->c2++;
+	    }
+	  else
+	    a = b;
+	  break;
+	case 1:
+	  switch (c)
+	    {
+	    case '{':
+	      a = 0;
+	      b = 15;
+	      f = f10 ();
+	      x->c2++;
+	      break;
+	    case '[':
+	      a = 0;
+	      b = 13;
+	      f = f11 ();
+	      x->c2++;
+	      break;
+	    case 'N':
+	    case 'n':
+	      a = 3;
+	      j = x->c2++;
+	      break;
+	    case '"':
+	    case '\'':
+	      h = c;
+	      f7 (x->c3);
+	      a = 8;
+	      j = ++x->c2;
+	      break;
+	    case 'T':
+	    case 't':
+	    case 'F':
+	    case 'f':
+	      a = 11;
+	      j = x->c2++;
+	      break;
+	    case '0' ... '9':
+	    case '-':
+	      i = 0;
+	      a = 12;
+	      j = x->c2++;
+	      break;
+	    default:
+	      e = 1;
+	      goto out;
+	    }
+	  break;
+	case 2:
+	  goto out;
+	case 3:
+	  if (__builtin_strncmp ("null", x->c1 + j, x->c2 - j))
+	    {
+	      e = 2;
+	      goto out;
+	    }
+	  if (x->c2 - j == 4)
+	    {
+	      f = 0;
+	      b = 2;
+	      a = 0;
+	    }
+	  else
+	    x->c2++;
+	  break;
+	case 4:
+	  if (c == '*')
+	    a = 5;
+	  else if (c == '/')
+	    a = 6;
+	  else
+	    {
+	      e = 8;
+	      goto out;
+	    }
+	  x->c2++;
+	  break;
+	case 5:
+	  if (c == '*')
+	    a = 7;
+	  x->c2++;
+	  break;
+	case 6:
+	  if (c == '\n')
+	    a = 0;
+	  x->c2++;
+	  break;
+	case 7:
+	  if (c == '/')
+	    a = 0;
+	  else
+	    a = 5;
+	  x->c2++;
+	  break;
+	case 8:
+	  if (c == h)
+	    {
+	      f6 (x->c3, x->c1 + j, x->c2 - j);
+	      f = f15 (x->c3->b1);
+	      b = 2;
+	      a = 0;
+	    }
+	  else if (c == '\\')
+	    {
+	      b = 8;
+	      a = 9;
+	    }
+	  x->c2++;
+	  break;
+	case 9:
+	  switch (c)
+	    {
+	    case '"':
+	    case '\\':
+	      f6 (x->c3, x->c1 + j, x->c2 - j - 1);
+	      j = x->c2++;
+	      a = b;
+	      break;
+	    case 'b':
+	    case 'n':
+	    case 'r':
+	    case 't':
+	      f6 (x->c3, x->c1 + j, x->c2 - j - 1);
+	      if (c == 'b')
+		f6 (x->c3, "\b", 1);
+	      else if (c == 'n')
+		f6 (x->c3, "\n", 1);
+	      else if (c == 'r')
+		f6 (x->c3, "\r", 1);
+	      else if (c == 't')
+		f6 (x->c3, "\t", 1);
+	      j = ++x->c2;
+	      a = b;
+	      break;
+	    case 'u':
+	      f6 (x->c3, x->c1 + j, x->c2 - j - 1);
+	      j = ++x->c2;
+	      a = 10;
+	      break;
+	    default:
+	      e = 7;
+	      goto out;
+	    }
+	  break;
+	case 10:
+	  if (__builtin_strchr (t, c))
+	    {
+	      x->c2++;
+	      if (x->c2 - j == 4)
+		{
+		  unsigned char w[3];
+		  unsigned int s =
+		    (((x->c1[j] <= '9') ? x->c1[j] - '0' : (x->c1[j] & 7) + 9) << 12)
+		    + (((x->c1[j + 1] <= '9') ? x->c1[j + 1] - '0' : (x->c1[j + 1] & 7) + 9) << 8)
+		    + (((x->c1[j + 2] <= '9') ? x->c1[j + 2] - '0' : (x->c1[j + 2] & 7) + 9) << 4)
+		    + ((x->c1[j + 3] <= '9') ? x->c1[j + 3] - '0' : (x->c1[j + 3] & 7) + 9);
+		  if (s < 0x80)
+		    {
+		      w[0] = s;
+		      f6 (x->c3, (char *) w, 1);
+		    }
+		  else if (s < 0x800)
+		    {
+		      w[0] = 0xc0 | (s >> 6);
+		      w[1] = 0x80 | (s & 0x3f);
+		      f6 (x->c3, (char *) w, 2);
+		    }
+		  else
+		    {
+		      w[0] = 0x0 | (s >> 12);
+		      w[1] = 0x80 | ((s >> 6) & 0x3f);
+		      w[2] = 0x80 | (s & 0x3f);
+		      f6 (x->c3, (char *) w, 3);
+		    }
+		  j = x->c2;
+		  a = b;
+		}
+	    }
+	  else
+	    {
+	      e = 7;
+	      goto out;
+	    }
+	  break;
+	case 11:
+	  if (__builtin_strncmp ("true", x->c1 + j, x->c2 - j) == 0)
+	    {
+	      if (x->c2 - j == 4)
+		{
+		  f = f12 (1);
+		  b = 2;
+		  a = 0;
+		}
+	      else
+		x->c2++;
+	    }
+	  else if (__builtin_strncmp ("false", x->c1 + j, x->c2 - j) == 0)
+	    {
+	      if (x->c2 - j == 5)
+		{
+		  f = f12 (0);
+		  b = 2;
+		  a = 0;
+		}
+	      else
+		x->c2++;
+	    }
+	  else
+	    {
+	      e = 3;
+	      goto out;
+	    }
+	  break;
+	case 12:
+	  if (!c || !__builtin_strchr (u, c))
+	    {
+	      if (!i)
+		f = f13 (0);
+	      else
+		f = f14 (0.0);
+	      b = 2;
+	      a = 0;
+	    }
+	  else
+	    {
+	      if (c == '.' || c == 'e')
+		i = 1;
+	      x->c2++;
+	    }
+	  break;
+	case 13:
+	  if (c == ']')
+	    {
+	      x->c2++;
+	      b = 2;
+	      a = 0;
+	    }
+	  else
+	    {
+	      o = f2 (x);
+	      if (((unsigned long) o > (unsigned long) -4000L))
+		{
+		  e = 5;
+		  goto out;
+		}
+	      f3 (f, o);
+	      b = 14;
+	      a = 0;
+	    }
+	  break;
+	case 14:
+	  if (c == ']')
+	    {
+	      x->c2++;
+	      b = 2;
+	      a = 0;
+	    }
+	  else if (c == ',')
+	    {
+	      x->c2++;
+	      b = 13;
+	      a = 0;
+	    }
+	  else
+	    {
+	      f9 (f);
+	      e = 5;
+	      goto out;
+	    }
+	  break;
+	case 15:
+	  a = 16;
+	  j = x->c2;
+	  break;
+	case 16:
+	  if (c == '}')
+	    {
+	      x->c2++;
+	      b = 2;
+	      a = 0;
+	    }
+	  else if (c == '"' || c == '\'')
+	    {
+	      h = c;
+	      f7 (x->c3);
+	      a = 17;
+	      j = ++x->c2;
+	    }
+	  else
+	    {
+	      e = 6;
+	      goto out;
+	    }
+	  break;
+	case 17:
+	  if (c == h)
+	    {
+	      f6 (x->c3, x->c1 + j, x->c2 - j);
+	      g = __builtin_strdup (x->c3->b1);
+	      b = 18;
+	      a = 0;
+	    }
+	  else if (c == '\\')
+	    {
+	      b = 17;
+	      a = 9;
+	    }
+	  x->c2++;
+	  break;
+	case 18:
+	  if (c == ':')
+	    {
+	      x->c2++;
+	      b = 19;
+	      a = 0;
+	    }
+	  else
+	    {
+	      e = -6;
+	      goto out;
+	    }
+	  break;
+	case 19:
+	  o = f2 (x);
+	  if (((unsigned long) o > (unsigned long) -4000L))
+	    {
+	      e = 6;
+	      goto out;
+	    }
+	  f4 (f, g, o);
+	  __builtin_free (g);
+	  g = 0;
+	  b = 20;
+	  a = 0;
+	  break;
+	case 20:
+	  if (c == '}')
+	    {
+	      x->c2++;
+	      b = 2;
+	      a = 0;
+	    }
+	  else if (c == ',')
+	    {
+	      x->c2++;
+	      b = 15;
+	      a = 0;
+	    }
+	  else
+	    {
+	      e = 6;
+	      goto out;
+	    }
+	  break;
+	}
+    }
+  while (c);
+  if (a != 2 && b != 2)
+    e = 9;
+out:
+  __builtin_free (g);
+  if (e == 0)
+    return f;
+  f9 (f);
+  return 0;
+}
+
+int
+main ()
+{
+  asm volatile ("" : : : "memory");
+  struct A *r = f1 ("{ \"id\": null, \"blahah\": \"foobarbazbar\", \"barbar\": { \"barbarbarba\":"
+		    "\"abcdefgh\", \"ijklmnopqr\": \"stuvwxyzabcdefghijklmnopqrstuv\", \"xyzxyz\":"
+		    " [ \"1\" ] } }");
+  if (!r)
+    __builtin_abort ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-10.c
+++ b/src/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-10.c
@@ -0,0 +1,24 @@
+/* PR 65177 */
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+typedef struct p7_profile_s {} P7_PROFILE;
+enum p7t_statetype_e {
+  p7T_S = 4,   p7T_N = 5,   p7T_E = 7,   p7T_C = 8,   p7T_J = 10, };
+typedef struct p7_trace_s {} P7_TRACE;
+typedef struct p7_gmx_s {
+  int L;
+} P7_GMX;
+static inline int select_c(const P7_PROFILE *gm, const P7_GMX *pp, const P7_GMX *gx, int i) {
+  float path[2];
+  return ((path[0] > path[1]) ? p7T_C : p7T_E);
+}
+void p7_GOATrace(const P7_PROFILE *gm, const P7_GMX *pp, const P7_GMX *gx, P7_TRACE *tr) {
+  int i = gx->L;
+  int sprv, scur;
+  while (sprv != p7T_S)     {
+    switch (sprv) {       case p7T_C: scur = select_c(gm, pp, gx, i); break;       }
+    if ( (scur == p7T_N || scur == p7T_J || scur == p7T_C) && scur == sprv) i--;
+    sprv = scur;
+  }
+}
--- a/src/gcc/testsuite/gcc.dg/tree-ssa/pr42585.c
+++ b/src/gcc/testsuite/gcc.dg/tree-ssa/pr42585.c
@@ -35,6 +35,6 @@
 /* Whether the structs are totally scalarized or not depends on the
    MOVE_RATIO macro definition in the back end.  The scalarization will
    not take place when using small values for MOVE_RATIO.  */
-/* { dg-final { scan-tree-dump-times "struct _fat_ptr _ans" 0 "optimized" { target { ! "arm*-*-* avr-*-* nds32*-*-* powerpc*-*-* s390*-*-* sh*-*-*" } } } } */
-/* { dg-final { scan-tree-dump-times "struct _fat_ptr _T2" 0 "optimized" { target { ! "arm*-*-* avr-*-* nds32*-*-* powerpc*-*-* s390*-*-* sh*-*-*" } } } } */
+/* { dg-final { scan-tree-dump-times "struct _fat_ptr _ans" 0 "optimized" { target { ! "aarch64*-*-* arm*-*-* avr-*-* nds32*-*-* powerpc*-*-* s390*-*-* sh*-*-*" } } } } */
+/* { dg-final { scan-tree-dump-times "struct _fat_ptr _T2" 0 "optimized" { target { ! "aarch64*-*-* arm*-*-* avr-*-* nds32*-*-* powerpc*-*-* s390*-*-* sh*-*-*" } } } } */
 /* { dg-final { cleanup-tree-dump "optimized" } } */
--- a/src/gcc/testsuite/gcc.dg/tree-ssa/sra-12.c
+++ b/src/gcc/testsuite/gcc.dg/tree-ssa/sra-12.c
@@ -21,5 +21,5 @@
   *p = l;
 }
 
-/* { dg-final { scan-tree-dump-times "l;" 0 "release_ssa" { target { ! "avr*-*-* nds32*-*-*" } } } } */
+/* { dg-final { scan-tree-dump-times "l;" 0 "release_ssa" { target { ! "aarch64*-*-* avr*-*-* nds32*-*-*" } } } } */
 /* { dg-final { cleanup-tree-dump "release_ssa" } } */
--- a/src/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c
+++ b/src/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c
@@ -0,0 +1,127 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-dom1-details" } */
+/* { dg-final { scan-tree-dump-times "FSM" 19 "dom1" } } */
+/* { dg-final { cleanup-tree-dump "dom1" } } */
+
+enum STATE {
+  S0=0,
+  SI,
+  S1,
+  S2,
+  S3,
+  S4,
+  S5,
+  S6
+};
+
+int bar (enum STATE s);
+
+enum STATE foo (unsigned char **y, unsigned *c)
+{
+  unsigned char *x = *y;
+  unsigned char n;
+  enum STATE s = S0;
+
+  for( ; *x && s != SI; x++ )
+    {
+      n = *x;
+      if (n == 'x')
+	{
+	  x++;
+	  break;
+	}
+      switch(s)
+	{
+	case S0:
+	  if(bar(n))
+	    s = S3;
+	  else if( n == 'a' || n == 'b' )
+	    s = S1;
+	  else if( n == 'c' )
+	    s = S4;
+	  else
+	    {
+	      s = SI;
+	      c[SI]++;
+	    }
+	  c[S0]++;
+	  break;
+	case S1:
+	  if(bar(n))
+	    {
+	      s = S3;
+	      c[S1]++;
+	    }
+	  else if( n == 'c' )
+	    {
+	      s = S4;
+	      c[S1]++;
+	    }
+	  else
+	    {
+	      s = SI;
+	      c[S1]++;
+	    }
+	  break;
+	case S3:
+	  if( n == 'c' )
+	    {
+	      s = S4;
+	      c[S3]++;
+	    }
+	  else if(!bar(n))
+	    {
+	      s = SI;
+	      c[S3]++;
+	    }
+	  break;
+	case S4:
+	  if( n == 'E' || n == 'e' )
+	    {
+	      s = S2;
+	      c[S4]++;
+	    }
+	  else if(!bar(n))
+	    {
+	      s = SI;
+	      c[S4]++;
+	    }
+	  break;
+	case S2:
+	  if( n == 'a' || n == 'b' )
+	    {
+	      s = S5;
+	      c[S2]++;
+	    }
+	  else
+	    {
+	      s = SI;
+	      c[S2]++;
+	    }
+	  break;
+	case S5:
+	  if(bar(n))
+	    {
+	      s = S6;
+	      c[S5]++;
+	    }
+	  else
+	    {
+	      s = SI;
+	      c[S5]++;
+	    }
+	  break;
+	case S6:
+	  if(!bar(n))
+	    {
+	      s = SI;
+	      c[SI]++;
+	    }
+	  break;
+	default:
+	  break;
+	}
+    }
+  *y=x;
+  return s;
+}
--- a/src/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-9.c
+++ b/src/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-9.c
@@ -0,0 +1,50 @@
+/* PR 65048 */
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+int a, b, c, d;
+void fn (void);
+
+int
+foo (x)
+{
+  switch (x)
+    {
+    case 'A':
+      return 'T';
+    case 'U':
+      return 'A';
+    }
+}
+
+void
+bar (int x, int y)
+{
+  switch (c)
+    {
+    case 'U':
+      switch (x)
+	{
+	default:
+	  fn ();
+	case 'G':
+	  switch (y)
+	    {
+	    case 'A':
+	      d = 7;
+	    }
+	}
+    }
+}
+
+void
+baz (void)
+{
+  while (1)
+    {
+      a = foo ();
+      b = foo ();
+      bar (a, b);
+    }
+}
+
--- a/src/gcc/testsuite/gcc.dg/tree-ssa/20040204-1.c
+++ b/src/gcc/testsuite/gcc.dg/tree-ssa/20040204-1.c
@@ -33,5 +33,5 @@
    that the && should be emitted (based on BRANCH_COST).  Fix this
    by teaching dom to look through && and register all components
    as true.  */
-/* { dg-final { scan-tree-dump-times "link_error" 0 "optimized" { xfail { ! "alpha*-*-* arm*-*-* powerpc*-*-* cris-*-* crisv32-*-* hppa*-*-* i?86-*-* mmix-*-* mips*-*-* m68k*-*-* moxie-*-* nds32*-*-* sparc*-*-* spu-*-* x86_64-*-*" } } } } */
+/* { dg-final { scan-tree-dump-times "link_error" 0 "optimized" { xfail { ! "alpha*-*-* arm*-*-* aarch64*-*-* powerpc*-*-* cris-*-* crisv32-*-* hppa*-*-* i?86-*-* mmix-*-* mips*-*-* m68k*-*-* moxie-*-* nds32*-*-* sparc*-*-* spu-*-* x86_64-*-*" } } } } */
 /* { dg-final { cleanup-tree-dump "optimized" } } */
--- a/src/gcc/testsuite/gcc.dg/pr60114.c
+++ b/src/gcc/testsuite/gcc.dg/pr60114.c
@@ -0,0 +1,31 @@
+/* PR c/60114 */
+/* { dg-do compile } */
+/* { dg-options "-Wconversion" } */
+
+struct S { int n, u[2]; };
+const signed char z[] = {
+  [0] = 0x100, /* { dg-warning "9:overflow in implicit constant conversion" } */
+  [2] = 0x101, /* { dg-warning "9:overflow in implicit constant conversion" } */
+};
+int A[] = {
+            0, 0x80000000, /* { dg-warning "16:conversion of unsigned constant value to negative integer" } */
+            0xA, 0x80000000, /* { dg-warning "18:conversion of unsigned constant value to negative integer" } */
+            0xA, 0xA, 0x80000000 /* { dg-warning "23:conversion of unsigned constant value to negative integer" } */
+          };
+int *p = (int []) { 0x80000000 }; /* { dg-warning "21:conversion of unsigned constant value to negative integer" } */
+union { int k; } u = { .k = 0x80000000 }; /* { dg-warning "29:conversion of unsigned constant value to negative integer" } */
+typedef int H[];
+void
+foo (void)
+{
+  signed char a[][3] = { { 0x100, /* { dg-warning "28:overflow in implicit constant conversion" } */
+                    1, 0x100 }, /* { dg-warning "24:overflow in implicit constant conversion" } */
+                  { '\0', 0x100, '\0' } /* { dg-warning "27:overflow in implicit constant conversion" } */
+                };
+  (const signed char []) { 0x100 }; /* { dg-warning "28:overflow in implicit constant conversion" } */
+  (const float []) { 1e0, 1e1, 1e100 }; /* { dg-warning "32:conversion" } */
+  struct S s1 = { 0x80000000 }; /* { dg-warning "19:conversion of unsigned constant value to negative integer" } */
+  struct S s2 = { .n = 0x80000000 }; /* { dg-warning "24:conversion of unsigned constant value to negative integer" } */
+  struct S s3 = { .u[1] = 0x80000000 }; /* { dg-warning "27:conversion of unsigned constant value to negative integer" } */
+  H h = { 1, 2, 0x80000000 }; /* { dg-warning "17:conversion of unsigned constant value to negative integer" } */
+}
--- a/src/gcc/testsuite/gcc.dg/vect/vect-reduc-mul_1.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-reduc-mul_1.c
@@ -0,0 +1,36 @@
+/* { dg-require-effective-target vect_int_mult } */
+/* { dg-require-effective-target whole_vector_shift } */
+
+/* Write a reduction loop to be reduced using vector shifts.  */
+
+extern void abort(void);
+
+unsigned char in[16];
+
+int
+main (unsigned char argc, char **argv)
+{
+  unsigned char i = 0;
+  unsigned char sum = 1;
+
+  for (i = 0; i < 16; i++)
+    in[i] = i + i + 1;
+
+  /* Prevent constant propagation of the entire loop below.  */
+  asm volatile ("" : : : "memory");
+
+  for (i = 0; i < 16; i++)
+    sum *= in[i];
+
+  if (sum != 33)
+    {
+      __builtin_printf("Failed %d\n", sum);
+      abort();
+    }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "Reduce using vector shifts" "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
--- a/src/gcc/testsuite/gcc.dg/vect/vect-reduc-mul_2.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-reduc-mul_2.c
@@ -0,0 +1,32 @@
+/* { dg-require-effective-target vect_int_mult } */
+/* { dg-require-effective-target whole_vector_shift } */
+
+/* Write a reduction loop to be reduced using vector shifts and folded.  */
+
+extern void abort(void);
+
+int
+main (unsigned char argc, char **argv)
+{
+  unsigned char in[16];
+  unsigned char i = 0;
+  unsigned char sum = 1;
+
+  for (i = 0; i < 16; i++)
+    in[i] = i + i + 1;
+
+  for (i = 0; i < 16; i++)
+    sum *= in[i];
+
+  if (sum != 33)
+    {
+      __builtin_printf("Failed %d\n", sum);
+      abort();
+    }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "Reduce using vector shifts" "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
--- a/src/gcc/testsuite/gcc.dg/vect/vect-reduc-or_1.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-reduc-or_1.c
@@ -0,0 +1,35 @@
+/* { dg-require-effective-target whole_vector_shift } */
+
+/* Write a reduction loop to be reduced using vector shifts.  */
+
+extern void abort(void);
+
+unsigned char in[16] __attribute__((__aligned__(16)));
+
+int
+main (unsigned char argc, char **argv)
+{
+  unsigned char i = 0;
+  unsigned char sum = 1;
+
+  for (i = 0; i < 16; i++)
+    in[i] = (i + i + 1) & 0xfd;
+
+  /* Prevent constant propagation of the entire loop below.  */
+  asm volatile ("" : : : "memory");
+
+  for (i = 0; i < 16; i++)
+    sum |= in[i];
+
+  if (sum != 29)
+    {
+      __builtin_printf("Failed %d\n", sum);
+      abort();
+    }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "Reduce using vector shifts" "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
--- a/src/gcc/testsuite/gcc.dg/vect/vect-bswap32.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-bswap32.c
@@ -0,0 +1,44 @@
+/* { dg-require-effective-target vect_bswap } */
+
+#include "tree-vect.h"
+
+#define N 128
+
+volatile int y = 0;
+
+static inline void
+vfoo32 (unsigned int* a)
+{
+  int i = 0;
+  for (i = 0; i < N; ++i)
+    a[i] = __builtin_bswap32 (a[i]);
+}
+
+int
+main (void)
+{
+  unsigned int arr[N];
+  unsigned int expect[N];
+  int i;
+
+  for (i = 0; i < N; ++i)
+    {
+      arr[i] = i;
+      expect[i] = __builtin_bswap32 (i);
+      if (y) /* Avoid vectorisation.  */
+        abort ();
+    }
+
+  vfoo32 (arr);
+
+  for (i = 0; i < N; ++i)
+    {
+      if (arr[i] != expect[i])
+        abort ();
+    }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.dg/vect/vect-reduc-or_2.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-reduc-or_2.c
@@ -0,0 +1,31 @@
+/* { dg-require-effective-target whole_vector_shift } */
+
+/* Write a reduction loop to be reduced using vector shifts and folded.  */
+
+extern void abort(void);
+
+int
+main (unsigned char argc, char **argv)
+{
+  unsigned char in[16] __attribute__((aligned(16)));
+  unsigned char i = 0;
+  unsigned char sum = 1;
+
+  for (i = 0; i < 16; i++)
+    in[i] = (i + i + 1) & 0xfd;
+
+  for (i = 0; i < 16; i++)
+    sum |= in[i];
+
+  if (sum != 29)
+    {
+      __builtin_printf("Failed %d\n", sum);
+      abort();
+    }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "Reduce using vector shifts" "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
--- a/src/gcc/testsuite/gcc.dg/vect/vect-bswap16.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-bswap16.c
@@ -0,0 +1,44 @@
+/* { dg-require-effective-target vect_bswap } */
+
+#include "tree-vect.h"
+
+#define N 128
+
+volatile int y = 0;
+
+static inline void
+vfoo16 (unsigned short int* a)
+{
+  int i = 0;
+  for (i = 0; i < N; ++i)
+    a[i] = __builtin_bswap16 (a[i]);
+}
+
+int
+main (void)
+{
+  unsigned short arr[N];
+  unsigned short expect[N];
+  int i;
+
+  for (i = 0; i < N; ++i)
+    {
+      arr[i] = i;
+      expect[i] = __builtin_bswap16 (i);
+      if (y) /* Avoid vectorisation.  */
+        abort ();
+    }
+
+  vfoo16 (arr);
+
+  for (i = 0; i < N; ++i)
+    {
+      if (arr[i] != expect[i])
+        abort ();
+    }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.dg/vect/vect-bswap64.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-bswap64.c
@@ -0,0 +1,44 @@
+/* { dg-require-effective-target vect_bswap } */
+
+#include "tree-vect.h"
+
+#define N 128
+
+volatile int y = 0;
+
+static inline void
+vfoo64 (unsigned long long* a)
+{
+  int i = 0;
+  for (i = 0; i < N; ++i)
+    a[i] = __builtin_bswap64 (a[i]);
+}
+
+int
+main (void)
+{
+  unsigned long long arr[N];
+  unsigned long long expect[N];
+  int i;
+
+  for (i = 0; i < N; ++i)
+    {
+      arr[i] = i;
+      expect[i] = __builtin_bswap64 (i);
+      if (y) /* Avoid vectorisation.  */
+        abort ();
+    }
+
+  vfoo64 (arr);
+
+  for (i = 0; i < N; ++i)
+    {
+      if (arr[i] != expect[i])
+        abort ();
+    }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.dg/ssp-3.c
+++ b/src/gcc/testsuite/gcc.dg/ssp-3.c
@@ -0,0 +1,16 @@
+/* { dg-do assemble } */
+/* { dg-options "-fstack-protector-strong -O1 -frename-registers" } */
+/* { dg-require-effective-target fstack_protector } */
+
+extern int bar (const char *s, int *argc);
+extern int baz (const char *s);
+
+char
+foo (const char *s)
+{
+  int argc;
+  int ret;
+  if ( !bar (s, &argc))
+    ret = baz (s);
+  return *s;
+}
--- a/src/gcc/testsuite/g++.dg/ipa/devirt-25.C
+++ b/src/gcc/testsuite/g++.dg/ipa/devirt-25.C
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O3 -fdump-ipa-cp"  } */
+/* { dg-add-options bind_pic_locally } */
 
 class ert_RefCounter {
  protected:
--- a/src/gcc/objcp/ChangeLog.linaro
+++ b/src/gcc/objcp/ChangeLog.linaro
@@ -0,0 +1,75 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
--- a/src/gcc/cp/ChangeLog.linaro
+++ b/src/gcc/cp/ChangeLog.linaro
@@ -0,0 +1,75 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
--- a/src/gcc/haifa-sched.c
+++ b/src/gcc/haifa-sched.c
@@ -239,6 +239,13 @@
 /* The minimal value of the INSN_TICK of an instruction.  */
 #define MIN_TICK (-max_insn_queue_index)
 
+/* Original order of insns in the ready list.
+   Used to keep order of normal insns while separating DEBUG_INSNs.  */
+#define INSN_RFS_DEBUG_ORIG_ORDER(INSN) (HID (INSN)->rfs_debug_orig_order)
+
+/* The deciding reason for INSN's place in the ready list.  */
+#define INSN_LAST_RFS_WIN(INSN) (HID (INSN)->last_rfs_win)
+
 /* List of important notes we must keep around.  This is a pointer to the
    last element in the list.  */
 rtx note_list;
@@ -345,7 +352,7 @@
 
 /* The following array is used to find the best insn from ready when
    the automaton pipeline interface is used.  */
-char *ready_try = NULL;
+signed char *ready_try = NULL;
 
 /* The ready list.  */
 struct ready_list ready = {NULL, 0, 0, 0, 0};
@@ -827,6 +834,7 @@
 /* Forward declarations.  */
 
 static int priority (rtx);
+static int autopref_rank_for_schedule (const rtx , const rtx);
 static int rank_for_schedule (const void *, const void *);
 static void swap_sort (rtx *, int);
 static void queue_insn (rtx, int, const char *);
@@ -859,8 +867,6 @@
 static void queue_to_ready (struct ready_list *);
 static int early_queue_to_ready (state_t, struct ready_list *);
 
-static void debug_ready_list (struct ready_list *);
-
 /* The following functions are used to implement multi-pass scheduling
    on the first cycle.  */
 static rtx ready_remove (struct ready_list *, int);
@@ -930,6 +936,13 @@
 /* Registers mentioned in the current region.  */
 static bitmap region_ref_regs;
 
+/* Effective number of available registers of a given class (see comment
+   in sched_pressure_start_bb).  */
+static int sched_class_regs_num[N_REG_CLASSES];
+/* Number of call_used_regs.  This is a helper for calculating of
+   sched_class_regs_num.  */
+static int call_used_regs_num[N_REG_CLASSES];
+
 /* Initiate register pressure relative info for scheduling the current
    region.  Currently it is only clearing register mentioned in the
    current region.  */
@@ -1113,7 +1126,7 @@
       gcc_assert (curr_reg_pressure[cl] >= 0);
       fprintf (sched_dump, "  %s:%d(%d)", reg_class_names[cl],
 	       curr_reg_pressure[cl],
-	       curr_reg_pressure[cl] - ira_class_hard_regs_num[cl]);
+	       curr_reg_pressure[cl] - sched_class_regs_num[cl]);
     }
   fprintf (sched_dump, "\n");
 }
@@ -1165,6 +1178,12 @@
   INSN_COST (insn) = -1;
   /* Invalidate INSN_TICK, so it'll be recalculated.  */
   INSN_TICK (insn) = INVALID_TICK;
+
+  /* Invalidate autoprefetch data entry.  */
+  INSN_AUTOPREF_MULTIPASS_DATA (insn)[0].status
+    = AUTOPREF_MULTIPASS_DATA_UNINITIALIZED;
+  INSN_AUTOPREF_MULTIPASS_DATA (insn)[1].status
+    = AUTOPREF_MULTIPASS_DATA_UNINITIALIZED;
 }
 
 
@@ -1203,6 +1222,11 @@
   if (!sd_lists_empty_p (next, SD_LIST_HARD_BACK))
     return HARD_DEP;
 
+  /* If NEXT is intended to sit adjacent to this instruction, we don't
+     want to try to break any dependencies.  Treat it as a HARD_DEP.  */
+  if (SCHED_GROUP_P (next))
+    return HARD_DEP;
+
   /* Now we've got NEXT with speculative deps only.
      1. Look at the deps to see what we have to do.
      2. Check if we can do 'todo'.  */
@@ -1685,13 +1709,6 @@
 /* Macros and functions for keeping the priority queue sorted, and
    dealing with queuing and dequeuing of instructions.  */
 
-#define SCHED_SORT(READY, N_READY)                                   \
-do { if ((N_READY) == 2)				             \
-       swap_sort (READY, N_READY);			             \
-     else if ((N_READY) > 2)                                         \
-         qsort (READY, N_READY, sizeof (rtx), rank_for_schedule); }  \
-while (0)
-
 /* For each pressure class CL, set DEATH[CL] to the number of registers
    in that class that die in INSN.  */
 
@@ -1733,9 +1750,9 @@
       cl = ira_pressure_classes[i];
       gcc_assert (curr_reg_pressure[cl] >= 0);
       change = (int) pressure_info[i].set_increase - death[cl];
-      before = MAX (0, max_reg_pressure[i] - ira_class_hard_regs_num[cl]);
+      before = MAX (0, max_reg_pressure[i] - sched_class_regs_num[cl]);
       after = MAX (0, max_reg_pressure[i] + change
-		   - ira_class_hard_regs_num[cl]);
+		   - sched_class_regs_num[cl]);
       hard_regno = ira_class_hard_regs[cl][0];
       gcc_assert (hard_regno >= 0);
       mode = reg_raw_mode[hard_regno];
@@ -2072,7 +2089,7 @@
 
       /* Check whether the maximum pressure in the overall schedule
 	 has increased.  (This means that the MODEL_MAX_PRESSURE of
-	 every point <= POINT will need to increae too; see below.)  */
+	 every point <= POINT will need to increase too; see below.)  */
       if (group->limits[pci].pressure < ref_pressure)
 	group->limits[pci].pressure = ref_pressure;
 
@@ -2349,7 +2366,7 @@
 /* Return the cost of increasing the pressure in class CL from FROM to TO.
 
    Here we use the very simplistic cost model that every register above
-   ira_class_hard_regs_num[CL] has a spill cost of 1.  We could use other
+   sched_class_regs_num[CL] has a spill cost of 1.  We could use other
    measures instead, such as one based on MEMORY_MOVE_COST.  However:
 
       (1) In order for an instruction to be scheduled, the higher cost
@@ -2373,7 +2390,7 @@
 static int
 model_spill_cost (int cl, int from, int to)
 {
-  from = MAX (from, ira_class_hard_regs_num[cl]);
+  from = MAX (from, sched_class_regs_num[cl]);
   return MAX (to, from) - from;
 }
 
@@ -2479,7 +2496,7 @@
   bool print_p;
 
   /* Record the baseECC value for each instruction in the model schedule,
-     except that negative costs are converted to zero ones now rather thatn
+     except that negative costs are converted to zero ones now rather than
      later.  Do not assign a cost to debug instructions, since they must
      not change code-generation decisions.  Experiments suggest we also
      get better results by not assigning a cost to instructions from
@@ -2527,6 +2544,62 @@
     }
 }
 
+
+/* Enum of rank_for_schedule heuristic decisions.  */
+enum rfs_decision {
+  RFS_LIVE_RANGE_SHRINK1, RFS_LIVE_RANGE_SHRINK2,
+  RFS_SCHED_GROUP, RFS_PRESSURE_DELAY, RFS_PRESSURE_TICK,
+  RFS_FEEDS_BACKTRACK_INSN, RFS_PRIORITY, RFS_SPECULATION,
+  RFS_SCHED_RANK, RFS_LAST_INSN, RFS_PRESSURE_INDEX,
+  RFS_DEP_COUNT, RFS_TIE, RFS_N };
+
+/* Corresponding strings for print outs.  */
+static const char *rfs_str[RFS_N] = {
+  "RFS_LIVE_RANGE_SHRINK1", "RFS_LIVE_RANGE_SHRINK2",
+  "RFS_SCHED_GROUP", "RFS_PRESSURE_DELAY", "RFS_PRESSURE_TICK",
+  "RFS_FEEDS_BACKTRACK_INSN", "RFS_PRIORITY", "RFS_SPECULATION",
+  "RFS_SCHED_RANK", "RFS_LAST_INSN", "RFS_PRESSURE_INDEX",
+  "RFS_DEP_COUNT", "RFS_TIE" };
+
+/* Statistical breakdown of rank_for_schedule decisions.  */
+typedef struct { unsigned stats[RFS_N]; } rank_for_schedule_stats_t;
+static rank_for_schedule_stats_t rank_for_schedule_stats;
+
+/* Return the result of comparing insns TMP and TMP2 and update
+   Rank_For_Schedule statistics.  */
+static int
+rfs_result (enum rfs_decision decision, int result, rtx tmp, rtx tmp2)
+{
+  ++rank_for_schedule_stats.stats[decision];
+  if (result < 0)
+    INSN_LAST_RFS_WIN (tmp) = decision;
+  else if (result > 0)
+    INSN_LAST_RFS_WIN (tmp2) = decision;
+  else
+    gcc_unreachable ();
+  return result;
+}
+
+/* Sorting predicate to move DEBUG_INSNs to the top of ready list, while
+   keeping normal insns in original order.  */
+
+static int
+rank_for_schedule_debug (const void *x, const void *y)
+{
+  rtx tmp = *(rtx const *) y;
+  rtx tmp2 = *(rtx const *) x;
+
+  /* Schedule debug insns as early as possible.  */
+  if (DEBUG_INSN_P (tmp) && !DEBUG_INSN_P (tmp2))
+    return -1;
+  else if (!DEBUG_INSN_P (tmp) && DEBUG_INSN_P (tmp2))
+    return 1;
+  else if (DEBUG_INSN_P (tmp) && DEBUG_INSN_P (tmp2))
+    return INSN_LUID (tmp) - INSN_LUID (tmp2);
+  else
+    return INSN_RFS_DEBUG_ORIG_ORDER (tmp2) - INSN_RFS_DEBUG_ORIG_ORDER (tmp);
+}
+
 /* Returns a positive value if x is preferred; returns a negative value if
    y is preferred.  Should never return 0, since that will make the sort
    unstable.  */
@@ -2539,17 +2612,6 @@
   int tmp_class, tmp2_class;
   int val, priority_val, info_val, diff;
 
-  if (MAY_HAVE_DEBUG_INSNS)
-    {
-      /* Schedule debug insns as early as possible.  */
-      if (DEBUG_INSN_P (tmp) && !DEBUG_INSN_P (tmp2))
-	return -1;
-      else if (!DEBUG_INSN_P (tmp) && DEBUG_INSN_P (tmp2))
-	return 1;
-      else if (DEBUG_INSN_P (tmp) && DEBUG_INSN_P (tmp2))
-	return INSN_LUID (tmp) - INSN_LUID (tmp2);
-    }
-
   if (live_range_shrinkage_p)
     {
       /* Don't use SCHED_PRESSURE_MODEL -- it results in much worse
@@ -2559,17 +2621,19 @@
 	   || INSN_REG_PRESSURE_EXCESS_COST_CHANGE (tmp2) < 0)
 	  && (diff = (INSN_REG_PRESSURE_EXCESS_COST_CHANGE (tmp)
 		      - INSN_REG_PRESSURE_EXCESS_COST_CHANGE (tmp2))) != 0)
-	return diff;
+	return rfs_result (RFS_LIVE_RANGE_SHRINK1, diff, tmp, tmp2);
       /* Sort by INSN_LUID (original insn order), so that we make the
 	 sort stable.  This minimizes instruction movement, thus
 	 minimizing sched's effect on debugging and cross-jumping.  */
-      return INSN_LUID (tmp) - INSN_LUID (tmp2);
+      return rfs_result (RFS_LIVE_RANGE_SHRINK2,
+			 INSN_LUID (tmp) - INSN_LUID (tmp2), tmp, tmp2);
     }
 
   /* The insn in a schedule group should be issued the first.  */
   if (flag_sched_group_heuristic &&
       SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
-    return SCHED_GROUP_P (tmp2) ? 1 : -1;
+    return rfs_result (RFS_SCHED_GROUP, SCHED_GROUP_P (tmp2) ? 1 : -1,
+		       tmp, tmp2);
 
   /* Make sure that priority of TMP and TMP2 are initialized.  */
   gcc_assert (INSN_PRIORITY_KNOWN (tmp) && INSN_PRIORITY_KNOWN (tmp2));
@@ -2582,18 +2646,15 @@
 		   + insn_delay (tmp)
 		   - INSN_REG_PRESSURE_EXCESS_COST_CHANGE (tmp2)
 		   - insn_delay (tmp2))))
-	return diff;
+	return rfs_result (RFS_PRESSURE_DELAY, diff, tmp, tmp2);
     }
 
   if (sched_pressure != SCHED_PRESSURE_NONE
-      && (INSN_TICK (tmp2) > clock_var || INSN_TICK (tmp) > clock_var))
+      && (INSN_TICK (tmp2) > clock_var || INSN_TICK (tmp) > clock_var)
+      && INSN_TICK (tmp2) != INSN_TICK (tmp))
     {
-      if (INSN_TICK (tmp) <= clock_var)
-	return -1;
-      else if (INSN_TICK (tmp2) <= clock_var)
-	return 1;
-      else
-	return INSN_TICK (tmp) - INSN_TICK (tmp2);
+      diff = INSN_TICK (tmp) - INSN_TICK (tmp2);
+      return rfs_result (RFS_PRESSURE_TICK, diff, tmp, tmp2);
     }
 
   /* If we are doing backtracking in this schedule, prefer insns that
@@ -2603,7 +2664,7 @@
     {
       priority_val = FEEDS_BACKTRACK_INSN (tmp2) - FEEDS_BACKTRACK_INSN (tmp);
       if (priority_val)
-	return priority_val;
+	return rfs_result (RFS_FEEDS_BACKTRACK_INSN, priority_val, tmp, tmp2);
     }
 
   /* Prefer insn with higher priority.  */
@@ -2610,8 +2671,15 @@
   priority_val = INSN_PRIORITY (tmp2) - INSN_PRIORITY (tmp);
 
   if (flag_sched_critical_path_heuristic && priority_val)
-    return priority_val;
+    return rfs_result (RFS_PRIORITY, priority_val, tmp, tmp2);
 
+  if (PARAM_VALUE (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH) >= 0)
+    {
+      int autopref = autopref_rank_for_schedule (tmp, tmp2);
+      if (autopref != 0)
+	return autopref;
+    }
+
   /* Prefer speculative insn with greater dependencies weakness.  */
   if (flag_sched_spec_insn_heuristic && spec_info)
     {
@@ -2633,12 +2701,12 @@
 
       dw = dw2 - dw1;
       if (dw > (NO_DEP_WEAK / 8) || dw < -(NO_DEP_WEAK / 8))
-	return dw;
+	return rfs_result (RFS_SPECULATION, dw, tmp, tmp2);
     }
 
   info_val = (*current_sched_info->rank) (tmp, tmp2);
   if (flag_sched_rank_heuristic && info_val)
-    return info_val;
+    return rfs_result (RFS_SCHED_RANK, info_val, tmp, tmp2);
 
   /* Compare insns based on their relation to the last scheduled
      non-debug insn.  */
@@ -2674,17 +2742,16 @@
 	tmp2_class = 2;
 
       if ((val = tmp2_class - tmp_class))
-	return val;
+	return rfs_result (RFS_LAST_INSN, val, tmp, tmp2);
     }
 
   /* Prefer instructions that occur earlier in the model schedule.  */
-  if (sched_pressure == SCHED_PRESSURE_MODEL)
+  if (sched_pressure == SCHED_PRESSURE_MODEL
+      && INSN_BB (tmp) == target_bb && INSN_BB (tmp2) == target_bb)
     {
-      int diff;
-
       diff = model_index (tmp) - model_index (tmp2);
-      if (diff != 0)
-	return diff;
+      gcc_assert (diff != 0);
+      return rfs_result (RFS_PRESSURE_INDEX, diff, tmp, tmp2);
     }
 
   /* Prefer the insn which has more later insns that depend on it.
@@ -2695,12 +2762,12 @@
 	 - dep_list_size (tmp, SD_LIST_FORW));
 
   if (flag_sched_dep_count_heuristic && val != 0)
-    return val;
+    return rfs_result (RFS_DEP_COUNT, val, tmp, tmp2);
 
   /* If insns are equally good, sort by INSN_LUID (original insn order),
      so that we make the sort stable.  This minimizes instruction movement,
      thus minimizing sched's effect on debugging and cross-jumping.  */
-  return INSN_LUID (tmp) - INSN_LUID (tmp2);
+  return rfs_result (RFS_TIE, INSN_LUID (tmp) - INSN_LUID (tmp2), tmp, tmp2);
 }
 
 /* Resort the array A in which only element at index N may be out of order.  */
@@ -2905,27 +2972,100 @@
   gcc_unreachable ();
 }
 
-/* Sort the ready list READY by ascending priority, using the SCHED_SORT
-   macro.  */
+/* Calculate difference of two statistics set WAS and NOW.
+   Result returned in WAS.  */
+static void
+rank_for_schedule_stats_diff (rank_for_schedule_stats_t *was,
+			      const rank_for_schedule_stats_t *now)
+{
+  for (int i = 0; i < RFS_N; ++i)
+    was->stats[i] = now->stats[i] - was->stats[i];
+}
 
-void
-ready_sort (struct ready_list *ready)
+/* Print rank_for_schedule statistics.  */
+static void
+print_rank_for_schedule_stats (const char *prefix,
+			       const rank_for_schedule_stats_t *stats,
+			       struct ready_list *ready)
 {
+  for (int i = 0; i < RFS_N; ++i)
+    if (stats->stats[i])
+      {
+	fprintf (sched_dump, "%s%20s: %u", prefix, rfs_str[i], stats->stats[i]);
+
+	if (ready != NULL)
+	  /* Print out insns that won due to RFS_<I>.  */
+	  {
+	    rtx *p = ready_lastpos (ready);
+
+	    fprintf (sched_dump, ":");
+	    /* Start with 1 since least-priority insn didn't have any wins.  */
+	    for (int j = 1; j < ready->n_ready; ++j)
+	      if (INSN_LAST_RFS_WIN (p[j]) == i)
+		fprintf (sched_dump, " %s",
+			 (*current_sched_info->print_insn) (p[j], 0));
+	  }
+	fprintf (sched_dump, "\n");
+      }
+}
+
+/* Separate DEBUG_INSNS from normal insns.  DEBUG_INSNs go to the end
+   of array.  */
+static void
+ready_sort_debug (struct ready_list *ready)
+{
   int i;
   rtx *first = ready_lastpos (ready);
 
+  for (i = 0; i < ready->n_ready; ++i)
+    if (!DEBUG_INSN_P (first[i]))
+      INSN_RFS_DEBUG_ORIG_ORDER (first[i]) = i;
+
+  qsort (first, ready->n_ready, sizeof (rtx), rank_for_schedule_debug);
+}
+
+/* Sort non-debug insns in the ready list READY by ascending priority.
+   Assumes that all debug insns are separated from the real insns.  */
+static void
+ready_sort_real (struct ready_list *ready)
+{
+  int i;
+  rtx *first = ready_lastpos (ready);
+  int n_ready_real = ready->n_ready - ready->n_debug;
+
   if (sched_pressure == SCHED_PRESSURE_WEIGHTED)
+    for (i = 0; i < n_ready_real; ++i)
+      setup_insn_reg_pressure_info (first[i]);
+  else if (sched_pressure == SCHED_PRESSURE_MODEL
+	   && model_curr_point < model_num_insns)
+    model_set_excess_costs (first, n_ready_real);
+
+  rank_for_schedule_stats_t stats1;
+  if (sched_verbose >= 4)
+    stats1 = rank_for_schedule_stats;
+
+  if (n_ready_real == 2)
+    swap_sort (first, n_ready_real);
+  else if (n_ready_real > 2)
+    qsort (first, n_ready_real, sizeof (rtx), rank_for_schedule);
+
+  if (sched_verbose >= 4)
     {
-      for (i = 0; i < ready->n_ready; i++)
-	if (!DEBUG_INSN_P (first[i]))
-	  setup_insn_reg_pressure_info (first[i]);
+      rank_for_schedule_stats_diff (&stats1, &rank_for_schedule_stats);
+      print_rank_for_schedule_stats (";;\t\t", &stats1, ready);
     }
-  if (sched_pressure == SCHED_PRESSURE_MODEL
-      && model_curr_point < model_num_insns)
-    model_set_excess_costs (first, ready->n_ready);
-  SCHED_SORT (first, ready->n_ready);
 }
 
+/* Sort the ready list READY by ascending priority.  */
+static void
+ready_sort (struct ready_list *ready)
+{
+  if (ready->n_debug > 0)
+    ready_sort_debug (ready);
+  else
+    ready_sort_real (ready);
+}
+
 /* PREV is an insn that is ready to execute.  Adjust its priority if that
    will help shorten or lengthen register lifetimes as appropriate.  Also
    provide a hook for the target to tweak itself.  */
@@ -2971,7 +3111,7 @@
 advance_one_cycle (void)
 {
   advance_state (curr_state);
-  if (sched_verbose >= 6)
+  if (sched_verbose >= 4)
     fprintf (sched_dump, ";;\tAdvance the current state.\n");
 }
 
@@ -3670,15 +3810,13 @@
    scheduling region.  */
 
 static void
-model_start_schedule (void)
+model_start_schedule (basic_block bb)
 {
-  basic_block bb;
-
   model_next_priority = 1;
   model_schedule.create (sched_max_luid);
   model_insns = XCNEWVEC (struct model_insn_info, sched_max_luid);
 
-  bb = BLOCK_FOR_INSN (NEXT_INSN (current_sched_info->prev_head));
+  gcc_assert (bb == BLOCK_FOR_INSN (NEXT_INSN (current_sched_info->prev_head)));
   initiate_reg_pressure_info (df_get_live_in (bb));
 
   model_analyze_insns ();
@@ -3716,6 +3854,53 @@
   model_finalize_pressure_group (&model_before_pressure);
   model_schedule.release ();
 }
+
+/* Prepare reg pressure scheduling for basic block BB.  */
+static void
+sched_pressure_start_bb (basic_block bb)
+{
+  /* Set the number of available registers for each class taking into account
+     relative probability of current basic block versus function prologue and
+     epilogue.
+     * If the basic block executes much more often than the prologue/epilogue
+     (e.g., inside a hot loop), then cost of spill in the prologue is close to
+     nil, so the effective number of available registers is
+     (ira_class_hard_regs_num[cl] - 0).
+     * If the basic block executes as often as the prologue/epilogue,
+     then spill in the block is as costly as in the prologue, so the effective
+     number of available registers is
+     (ira_class_hard_regs_num[cl] - call_used_regs_num[cl]).
+     Note that all-else-equal, we prefer to spill in the prologue, since that
+     allows "extra" registers for other basic blocks of the function.
+     * If the basic block is on the cold path of the function and executes
+     rarely, then we should always prefer to spill in the block, rather than
+     in the prologue/epilogue.  The effective number of available register is
+     (ira_class_hard_regs_num[cl] - call_used_regs_num[cl]).  */
+  {
+    int i;
+    int entry_freq = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency;
+    int bb_freq = bb->frequency;
+
+    if (bb_freq == 0)
+      {
+	if (entry_freq == 0)
+	  entry_freq = bb_freq = 1;
+      }
+    if (bb_freq < entry_freq)
+      bb_freq = entry_freq;
+
+    for (i = 0; i < ira_pressure_classes_num; ++i)
+      {
+	enum reg_class cl = ira_pressure_classes[i];
+	sched_class_regs_num[cl] = ira_class_hard_regs_num[cl];
+	sched_class_regs_num[cl]
+	  -= (call_used_regs_num[cl] * entry_freq) / bb_freq;
+      }
+  }
+
+  if (sched_pressure == SCHED_PRESSURE_MODEL)
+    model_start_schedule (bb);
+}
 
 /* A structure that holds local state for the loop in schedule_block.  */
 struct sched_block_state
@@ -3750,7 +3935,7 @@
   if (sched_verbose >= 1)
     {
       struct reg_pressure_data *pressure_info;
-      fprintf (sched_dump, ";;\t%3i--> %s%-40s:",
+      fprintf (sched_dump, ";;\t%3i--> %s %-40s:",
 	       clock_var, (*current_sched_info->print_insn) (insn, 1),
 	       str_pattern_slim (PATTERN (insn)));
 
@@ -3944,6 +4129,10 @@
       last_clock_var = clock_var;
     }
 
+  if (nonscheduled_insns_begin != NULL_RTX)
+    /* Indicate to debug counters that INSN is scheduled.  */
+    nonscheduled_insns_begin = insn;
+
   return advance;
 }
 
@@ -4048,6 +4237,7 @@
 
   rtx last_scheduled_insn;
   rtx last_nondebug_scheduled_insn;
+  rtx nonscheduled_insns_begin;
   int cycle_issued_insns;
 
   /* Copies of state used in the inner loop of schedule_block.  */
@@ -4120,6 +4310,7 @@
   save->cycle_issued_insns = cycle_issued_insns;
   save->last_scheduled_insn = last_scheduled_insn;
   save->last_nondebug_scheduled_insn = last_nondebug_scheduled_insn;
+  save->nonscheduled_insns_begin = nonscheduled_insns_begin;
 
   save->sched_block = sched_block;
 
@@ -4375,6 +4566,7 @@
   cycle_issued_insns = save->cycle_issued_insns;
   last_scheduled_insn = save->last_scheduled_insn;
   last_nondebug_scheduled_insn = save->last_nondebug_scheduled_insn;
+  nonscheduled_insns_begin = save->nonscheduled_insns_begin;
 
   *psched_block = save->sched_block;
 
@@ -4843,6 +5035,24 @@
     }
 }
 
+/* Return first non-scheduled insn in the current scheduling block.
+   This is mostly used for debug-counter purposes.  */
+static rtx
+first_nonscheduled_insn (void)
+{
+  rtx insn = (nonscheduled_insns_begin != NULL_RTX
+	      ? nonscheduled_insns_begin
+	      : current_sched_info->prev_head);
+
+  do
+    {
+      insn = next_nonnote_nondebug_insn (insn);
+    }
+  while (QUEUE_INDEX (insn) == QUEUE_SCHEDULED);
+
+  return insn;
+}
+
 /* Move insns that became ready to fire from queue to ready list.  */
 
 static void
@@ -4855,16 +5065,9 @@
   q_ptr = NEXT_Q (q_ptr);
 
   if (dbg_cnt (sched_insn) == false)
-    {
-      /* If debug counter is activated do not requeue the first
-	 nonscheduled insn.  */
-      skip_insn = nonscheduled_insns_begin;
-      do
-	{
-	  skip_insn = next_nonnote_nondebug_insn (skip_insn);
-	}
-      while (QUEUE_INDEX (skip_insn) == QUEUE_SCHEDULED);
-    }
+    /* If debug counter is activated do not requeue the first
+       nonscheduled insn.  */
+    skip_insn = first_nonscheduled_insn ();
   else
     skip_insn = NULL_RTX;
 
@@ -4894,7 +5097,11 @@
 	       && model_index (insn) == model_curr_point)
 	  && !SCHED_GROUP_P (insn)
 	  && insn != skip_insn)
-	queue_insn (insn, 1, "ready full");
+	{
+	  if (sched_verbose >= 2)
+	    fprintf (sched_dump, "keeping in queue, ready full\n");
+	  queue_insn (insn, 1, "ready full");
+	}
       else
 	{
 	  ready_add (ready, insn, false);
@@ -4939,6 +5146,9 @@
 
       q_ptr = NEXT_Q_AFTER (q_ptr, stalls);
       clock_var += stalls;
+      if (sched_verbose >= 2)
+	fprintf (sched_dump, ";;\tAdvancing clock by %d cycle[s] to %d\n",
+		 stalls, clock_var);
     }
 }
 
@@ -5099,10 +5309,11 @@
 }
 
 
-/* Print the ready list for debugging purposes.  Callable from debugger.  */
-
+/* Print the ready list for debugging purposes.
+   If READY_TRY is non-zero then only print insns that max_issue
+   will consider.  */
 static void
-debug_ready_list (struct ready_list *ready)
+debug_ready_list_1 (struct ready_list *ready, signed char *ready_try)
 {
   rtx *p;
   int i;
@@ -5116,6 +5327,9 @@
   p = ready_lastpos (ready);
   for (i = 0; i < ready->n_ready; i++)
     {
+      if (ready_try != NULL && ready_try[ready->n_ready - i - 1])
+	continue;
+
       fprintf (sched_dump, "  %s:%d",
 	       (*current_sched_info->print_insn) (p[i], 0),
 	       INSN_LUID (p[i]));
@@ -5122,8 +5336,12 @@
       if (sched_pressure != SCHED_PRESSURE_NONE)
 	fprintf (sched_dump, "(cost=%d",
 		 INSN_REG_PRESSURE_EXCESS_COST_CHANGE (p[i]));
+      fprintf (sched_dump, ":prio=%d", INSN_PRIORITY (p[i]));
       if (INSN_TICK (p[i]) > clock_var)
 	fprintf (sched_dump, ":delay=%d", INSN_TICK (p[i]) - clock_var);
+      if (sched_pressure == SCHED_PRESSURE_MODEL)
+	fprintf (sched_dump, ":idx=%d",
+		 model_index (p[i]));
       if (sched_pressure != SCHED_PRESSURE_NONE)
 	fprintf (sched_dump, ")");
     }
@@ -5130,6 +5348,13 @@
   fprintf (sched_dump, "\n");
 }
 
+/* Print the ready list.  Callable from debugger.  */
+static void
+debug_ready_list (struct ready_list *ready)
+{
+  debug_ready_list_1 (ready, NULL);
+}
+
 /* Search INSN for REG_SAVE_NOTE notes and convert them back into insn
    NOTEs.  This is used for NOTE_INSN_EPILOGUE_BEG, so that sched-ebb
    replaces the epilogue note in the correct basic block.  */
@@ -5252,6 +5477,241 @@
   return false;
 }
 
+/* Functions to model cache auto-prefetcher.
+
+   Some of the CPUs have cache auto-prefetcher, which /seems/ to initiate
+   memory prefetches if it sees instructions with consequitive memory accesses
+   in the instruction stream.  Details of such hardware units are not published,
+   so we can only guess what exactly is going on there.
+   In the scheduler, we model abstract auto-prefetcher.  If there are memory
+   insns in the ready list (or the queue) that have same memory base, but
+   different offsets, then we delay the insns with larger offsets until insns
+   with smaller offsets get scheduled.  If PARAM_SCHED_AUTOPREF_QUEUE_DEPTH
+   is "1", then we look at the ready list; if it is N>1, then we also look
+   through N-1 queue entries.
+   If the param is N>=0, then rank_for_schedule will consider auto-prefetching
+   among its heuristics.
+   Param value of "-1" disables modelling of the auto-prefetcher.  */
+
+/* Initialize autoprefetcher model data for INSN.  */
+static void
+autopref_multipass_init (const rtx insn, int write)
+{
+  autopref_multipass_data_t data = &INSN_AUTOPREF_MULTIPASS_DATA (insn)[write];
+
+  gcc_assert (data->status == AUTOPREF_MULTIPASS_DATA_UNINITIALIZED);
+  data->base = NULL_RTX;
+  data->offset = 0;
+  /* Set insn entry initialized, but not relevant for auto-prefetcher.  */
+  data->status = AUTOPREF_MULTIPASS_DATA_IRRELEVANT;
+
+  rtx set = single_set (insn);
+  if (set == NULL_RTX)
+    return;
+
+  rtx mem = write ? SET_DEST (set) : SET_SRC (set);
+  if (!MEM_P (mem))
+    return;
+
+  struct address_info info;
+  decompose_mem_address (&info, mem);
+
+  /* TODO: Currently only (base+const) addressing is supported.  */
+  if (info.base == NULL || !REG_P (*info.base)
+      || (info.disp != NULL && !CONST_INT_P (*info.disp)))
+    return;
+
+  /* This insn is relevant for auto-prefetcher.  */
+  data->base = *info.base;
+  data->offset = info.disp ? INTVAL (*info.disp) : 0;
+  data->status = AUTOPREF_MULTIPASS_DATA_NORMAL;
+}
+
+/* Helper function for rank_for_schedule sorting.  */
+static int
+autopref_rank_for_schedule (const rtx insn1, const rtx insn2)
+{
+  for (int write = 0; write < 2; ++write)
+    {
+      autopref_multipass_data_t data1
+	= &INSN_AUTOPREF_MULTIPASS_DATA (insn1)[write];
+      autopref_multipass_data_t data2
+	= &INSN_AUTOPREF_MULTIPASS_DATA (insn2)[write];
+
+      if (data1->status == AUTOPREF_MULTIPASS_DATA_UNINITIALIZED)
+	autopref_multipass_init (insn1, write);
+      if (data1->status == AUTOPREF_MULTIPASS_DATA_IRRELEVANT)
+	continue;
+
+      if (data2->status == AUTOPREF_MULTIPASS_DATA_UNINITIALIZED)
+	autopref_multipass_init (insn2, write);
+      if (data2->status == AUTOPREF_MULTIPASS_DATA_IRRELEVANT)
+	continue;
+
+      if (!rtx_equal_p (data1->base, data2->base))
+	continue;
+
+      return data1->offset - data2->offset;
+    }
+
+  return 0;
+}
+
+/* True if header of debug dump was printed.  */
+static bool autopref_multipass_dfa_lookahead_guard_started_dump_p;
+
+/* Helper for autopref_multipass_dfa_lookahead_guard.
+   Return "1" if INSN1 should be delayed in favor of INSN2.  */
+static int
+autopref_multipass_dfa_lookahead_guard_1 (const rtx insn1,
+					  const rtx insn2, int write)
+{
+  autopref_multipass_data_t data1
+    = &INSN_AUTOPREF_MULTIPASS_DATA (insn1)[write];
+  autopref_multipass_data_t data2
+    = &INSN_AUTOPREF_MULTIPASS_DATA (insn2)[write];
+
+  if (data2->status == AUTOPREF_MULTIPASS_DATA_UNINITIALIZED)
+    autopref_multipass_init (insn2, write);
+  if (data2->status == AUTOPREF_MULTIPASS_DATA_IRRELEVANT)
+    return 0;
+
+  if (rtx_equal_p (data1->base, data2->base)
+      && data1->offset > data2->offset)
+    {
+      if (sched_verbose >= 2)
+	{
+          if (!autopref_multipass_dfa_lookahead_guard_started_dump_p)
+	    {
+	      fprintf (sched_dump,
+		       ";;\t\tnot trying in max_issue due to autoprefetch "
+		       "model: ");
+	      autopref_multipass_dfa_lookahead_guard_started_dump_p = true;
+	    }
+
+	  fprintf (sched_dump, " %d(%d)", INSN_UID (insn1), INSN_UID (insn2));
+	}
+
+      return 1;
+    }
+
+  return 0;
+}
+
+/* General note:
+
+   We could have also hooked autoprefetcher model into
+   first_cycle_multipass_backtrack / first_cycle_multipass_issue hooks
+   to enable intelligent selection of "[r1+0]=r2; [r1+4]=r3" on the same cycle
+   (e.g., once "[r1+0]=r2" is issued in max_issue(), "[r1+4]=r3" gets
+   unblocked).  We don't bother about this yet because target of interest
+   (ARM Cortex-A15) can issue only 1 memory operation per cycle.  */
+
+/* Implementation of first_cycle_multipass_dfa_lookahead_guard hook.
+   Return "1" if INSN1 should not be considered in max_issue due to
+   auto-prefetcher considerations.  */
+int
+autopref_multipass_dfa_lookahead_guard (rtx insn1, int ready_index)
+{
+  int r = 0;
+
+  if (PARAM_VALUE (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH) <= 0)
+    return 0;
+
+  if (sched_verbose >= 2 && ready_index == 0)
+    autopref_multipass_dfa_lookahead_guard_started_dump_p = false;
+
+  for (int write = 0; write < 2; ++write)
+    {
+      autopref_multipass_data_t data1
+	= &INSN_AUTOPREF_MULTIPASS_DATA (insn1)[write];
+
+      if (data1->status == AUTOPREF_MULTIPASS_DATA_UNINITIALIZED)
+	autopref_multipass_init (insn1, write);
+      if (data1->status == AUTOPREF_MULTIPASS_DATA_IRRELEVANT)
+	continue;
+
+      if (ready_index == 0
+	  && data1->status == AUTOPREF_MULTIPASS_DATA_DONT_DELAY)
+	/* We allow only a single delay on priviledged instructions.
+	   Doing otherwise would cause infinite loop.  */
+	{
+	  if (sched_verbose >= 2)
+	    {
+	      if (!autopref_multipass_dfa_lookahead_guard_started_dump_p)
+		{
+		  fprintf (sched_dump,
+			   ";;\t\tnot trying in max_issue due to autoprefetch "
+			   "model: ");
+		  autopref_multipass_dfa_lookahead_guard_started_dump_p = true;
+		}
+
+	      fprintf (sched_dump, " *%d*", INSN_UID (insn1));
+	    }
+	  continue;
+	}
+
+      for (int i2 = 0; i2 < ready.n_ready; ++i2)
+	{
+	  rtx insn2 = get_ready_element (i2);
+	  if (insn1 == insn2)
+	    continue;
+	  r = autopref_multipass_dfa_lookahead_guard_1 (insn1, insn2, write);
+	  if (r)
+	    {
+	      if (ready_index == 0)
+		{
+		  r = -1;
+		  data1->status = AUTOPREF_MULTIPASS_DATA_DONT_DELAY;
+		}
+	      goto finish;
+	    }
+	}
+
+      if (PARAM_VALUE (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH) == 1)
+	continue;
+
+      /* Everything from the current queue slot should have been moved to
+	 the ready list.  */
+      gcc_assert (insn_queue[NEXT_Q_AFTER (q_ptr, 0)] == NULL_RTX);
+
+      int n_stalls = PARAM_VALUE (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH) - 1;
+      if (n_stalls > max_insn_queue_index)
+	n_stalls = max_insn_queue_index;
+
+      for (int stalls = 1; stalls <= n_stalls; ++stalls)
+	{
+	  for (rtx link = insn_queue[NEXT_Q_AFTER (q_ptr, stalls)];
+	       link != NULL_RTX;
+	       link = XEXP (link, 1))
+	    {
+	      rtx insn2 = XEXP (link, 0);
+	      r = autopref_multipass_dfa_lookahead_guard_1 (insn1, insn2,
+							    write);
+	      if (r)
+		{
+		  /* Queue INSN1 until INSN2 can issue.  */
+		  r = -stalls;
+		  if (ready_index == 0)
+		    data1->status = AUTOPREF_MULTIPASS_DATA_DONT_DELAY;
+		  goto finish;
+		}
+	    }
+	}
+    }
+
+    finish:
+  if (sched_verbose >= 2
+      && autopref_multipass_dfa_lookahead_guard_started_dump_p
+      && (ready_index == ready.n_ready - 1 || r < 0))
+    /* This does not /always/ trigger.  We don't output EOL if the last
+       insn is not recognized (INSN_CODE < 0) and lookahead_guard is not
+       called.  We can live with this.  */
+    fprintf (sched_dump, "\n");
+
+  return r;
+}
+
 /* Define type for target data used in multipass scheduling.  */
 #ifndef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DATA_T
 # define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DATA_T int
@@ -5291,15 +5751,6 @@
    could achieve DFA_LOOKAHEAD ** N , where N is the queue length.  */
 static int max_lookahead_tries;
 
-/* The following value is value of hook
-   `first_cycle_multipass_dfa_lookahead' at the last call of
-   `max_issue'.  */
-static int cached_first_cycle_multipass_dfa_lookahead = 0;
-
-/* The following value is value of `issue_rate' at the last call of
-   `sched_init'.  */
-static int cached_issue_rate = 0;
-
 /* The following function returns maximal (or close to maximal) number
    of insns which can be issued on the same cycle and one of which
    insns is insns with the best rank (the first insn in READY).  To
@@ -5328,9 +5779,8 @@
 	      && privileged_n <= n_ready);
 
   /* Init MAX_LOOKAHEAD_TRIES.  */
-  if (cached_first_cycle_multipass_dfa_lookahead != dfa_lookahead)
+  if (max_lookahead_tries == 0)
     {
-      cached_first_cycle_multipass_dfa_lookahead = dfa_lookahead;
       max_lookahead_tries = 100;
       for (i = 0; i < issue_rate; i++)
 	max_lookahead_tries *= dfa_lookahead;
@@ -5359,6 +5809,12 @@
     if (!ready_try [i])
       all++;
 
+  if (sched_verbose >= 2)
+    {
+      fprintf (sched_dump, ";;\t\tmax_issue among %d insns:", all);
+      debug_ready_list_1 (ready, ready_try);
+    }
+
   /* I is the index of the insn to try next.  */
   i = 0;
   tries_num = 0;
@@ -5487,21 +5943,16 @@
 choose_ready (struct ready_list *ready, bool first_cycle_insn_p,
 	      rtx *insn_ptr)
 {
-  int lookahead;
-
   if (dbg_cnt (sched_insn) == false)
     {
-      rtx insn = nonscheduled_insns_begin;
-      do
-	{
-	  insn = next_nonnote_insn (insn);
-	}
-      while (QUEUE_INDEX (insn) == QUEUE_SCHEDULED);
+      if (nonscheduled_insns_begin == NULL_RTX)
+	nonscheduled_insns_begin = current_sched_info->prev_head;
 
+      rtx insn = first_nonscheduled_insn ();
+
       if (QUEUE_INDEX (insn) == QUEUE_READY)
 	/* INSN is in the ready_list.  */
 	{
-	  nonscheduled_insns_begin = insn;
 	  ready_remove_insn (insn);
 	  *insn_ptr = insn;
 	  return 0;
@@ -5508,14 +5959,11 @@
 	}
 
       /* INSN is in the queue.  Advance cycle to move it to the ready list.  */
+      gcc_assert (QUEUE_INDEX (insn) >= 0);
       return -1;
     }
 
-  lookahead = 0;
-
-  if (targetm.sched.first_cycle_multipass_dfa_lookahead)
-    lookahead = targetm.sched.first_cycle_multipass_dfa_lookahead ();
-  if (lookahead <= 0 || SCHED_GROUP_P (ready_element (ready, 0))
+  if (dfa_lookahead <= 0 || SCHED_GROUP_P (ready_element (ready, 0))
       || DEBUG_INSN_P (ready_element (ready, 0)))
     {
       if (targetm.sched.dispatch (NULL_RTX, IS_DISPATCH_ON))
@@ -5527,11 +5975,9 @@
     }
   else
     {
-      /* Try to choose the better insn.  */
-      int index = 0, i, n;
+      /* Try to choose the best insn.  */
+      int index = 0, i;
       rtx insn;
-      int try_data = 1, try_control = 1;
-      ds_t ts;
 
       insn = ready_element (ready, 0);
       if (INSN_CODE (insn) < 0)
@@ -5540,84 +5986,57 @@
 	  return 0;
 	}
 
-      if (spec_info
-	  && spec_info->flags & (PREFER_NON_DATA_SPEC
-				 | PREFER_NON_CONTROL_SPEC))
+      /* Filter the search space.  */
+      for (i = 0; i < ready->n_ready; i++)
 	{
-	  for (i = 0, n = ready->n_ready; i < n; i++)
+	  ready_try[i] = 0;
+
+	  insn = ready_element (ready, i);
+
+	  /* If this insn is recognizable we should have already
+	     recognized it earlier.
+	     ??? Not very clear where this is supposed to be done.
+	     See dep_cost_1.  */
+	  gcc_checking_assert (INSN_CODE (insn) >= 0
+			       || recog_memoized (insn) < 0);
+	  if (INSN_CODE (insn) < 0)
 	    {
-	      rtx x;
-	      ds_t s;
+	      /* Non-recognized insns at position 0 are handled above.  */
+	      gcc_assert (i > 0);
+	      ready_try[i] = 1;
+	      continue;
+	    }
 
-	      x = ready_element (ready, i);
-	      s = TODO_SPEC (x);
+	  if (targetm.sched.first_cycle_multipass_dfa_lookahead_guard)
+	    {
+	      ready_try[i]
+		= (targetm.sched.first_cycle_multipass_dfa_lookahead_guard
+		    (insn, i));
 
-	      if (spec_info->flags & PREFER_NON_DATA_SPEC
-		  && !(s & DATA_SPEC))
+	      if (ready_try[i] < 0)
+		/* Queue instruction for several cycles.
+		   We need to restart choose_ready as we have changed
+		   the ready list.  */
 		{
-		  try_data = 0;
-		  if (!(spec_info->flags & PREFER_NON_CONTROL_SPEC)
-		      || !try_control)
-		    break;
+		  change_queue_index (insn, -ready_try[i]);
+		  return 1;
 		}
 
-	      if (spec_info->flags & PREFER_NON_CONTROL_SPEC
-		  && !(s & CONTROL_SPEC))
-		{
-		  try_control = 0;
-		  if (!(spec_info->flags & PREFER_NON_DATA_SPEC) || !try_data)
-		    break;
-		}
+	      /* Make sure that we didn't end up with 0'th insn filtered out.
+		 Don't be tempted to make life easier for backends and just
+		 requeue 0'th insn if (ready_try[0] == 0) and restart
+		 choose_ready.  Backends should be very considerate about
+		 requeueing instructions -- especially the highest priority
+		 one at position 0.  */
+	      gcc_assert (ready_try[i] == 0 || i > 0);
+	      if (ready_try[i])
+		continue;
 	    }
-	}
 
-      ts = TODO_SPEC (insn);
-      if ((ts & SPECULATIVE)
-	  && (((!try_data && (ts & DATA_SPEC))
-	       || (!try_control && (ts & CONTROL_SPEC)))
-	      || (targetm.sched.first_cycle_multipass_dfa_lookahead_guard_spec
-		  && !targetm.sched
-		  .first_cycle_multipass_dfa_lookahead_guard_spec (insn))))
-	/* Discard speculative instruction that stands first in the ready
-	   list.  */
-	{
-	  change_queue_index (insn, 1);
-	  return 1;
+	  gcc_assert (ready_try[i] == 0);
+	  /* INSN made it through the scrutiny of filters!  */
 	}
 
-      ready_try[0] = 0;
-
-      for (i = 1; i < ready->n_ready; i++)
-	{
-	  insn = ready_element (ready, i);
-
-	  ready_try [i]
-	    = ((!try_data && (TODO_SPEC (insn) & DATA_SPEC))
-               || (!try_control && (TODO_SPEC (insn) & CONTROL_SPEC)));
-	}
-
-      /* Let the target filter the search space.  */
-      for (i = 1; i < ready->n_ready; i++)
-	if (!ready_try[i])
-	  {
-	    insn = ready_element (ready, i);
-
-	    /* If this insn is recognizable we should have already
-	       recognized it earlier.
-	       ??? Not very clear where this is supposed to be done.
-	       See dep_cost_1.  */
-	    gcc_checking_assert (INSN_CODE (insn) >= 0
-				 || recog_memoized (insn) < 0);
-
-	    ready_try [i]
-	      = (/* INSN_CODE check can be omitted here as it is also done later
-		    in max_issue ().  */
-		 INSN_CODE (insn) < 0
-		 || (targetm.sched.first_cycle_multipass_dfa_lookahead_guard
-		     && !targetm.sched.first_cycle_multipass_dfa_lookahead_guard
-		     (insn)));
-	  }
-
       if (max_issue (ready, 1, curr_state, first_cycle_insn_p, &index) == 0)
 	{
 	  *insn_ptr = ready_remove_first (ready);
@@ -5865,6 +6284,35 @@
   return earliest_fail;
 }
 
+/* Print instructions together with useful scheduling information between
+   HEAD and TAIL (inclusive).  */
+static void
+dump_insn_stream (rtx head, rtx tail)
+{
+  fprintf (sched_dump, ";;\t| insn | prio |\n");
+
+  rtx next_tail = NEXT_INSN (tail);
+  for (rtx insn = head; insn != next_tail; insn = NEXT_INSN (insn))
+    {
+      int priority = NOTE_P (insn) ? 0 : INSN_PRIORITY (insn);
+      const char *pattern = (NOTE_P (insn)
+			     ? "note"
+			     : str_pattern_slim (PATTERN (insn)));
+
+      fprintf (sched_dump, ";;\t| %4d | %4d | %-30s ",
+	       INSN_UID (insn), priority, pattern);
+
+      if (sched_verbose >= 4)
+	{
+	  if (NOTE_P (insn) || recog_memoized (insn) < 0)
+	    fprintf (sched_dump, "nothing");
+	  else
+	    print_reservation (sched_dump, insn);
+	}
+      fprintf (sched_dump, "\n");
+    }
+}
+
 /* Use forward list scheduling to rearrange insns of block pointed to by
    TARGET_BB, possibly bringing insns from subsequent blocks in the same
    region.  */
@@ -5903,8 +6351,17 @@
 
   /* Debug info.  */
   if (sched_verbose)
-    dump_new_block_header (0, *target_bb, head, tail);
+    {
+      dump_new_block_header (0, *target_bb, head, tail);
 
+      if (sched_verbose >= 2)
+	{
+	  dump_insn_stream (head, tail);
+	  memset (&rank_for_schedule_stats, 0,
+		  sizeof (rank_for_schedule_stats));
+	}
+    }
+
   if (init_state == NULL)
     state_reset (curr_state);
   else
@@ -5922,8 +6379,9 @@
     targetm.sched.init (sched_dump, sched_verbose, ready.veclen);
 
   /* We start inserting insns after PREV_HEAD.  */
-  last_scheduled_insn = nonscheduled_insns_begin = prev_head;
+  last_scheduled_insn = prev_head;
   last_nondebug_scheduled_insn = NULL_RTX;
+  nonscheduled_insns_begin = NULL_RTX;
 
   gcc_assert ((NOTE_P (last_scheduled_insn)
 	       || DEBUG_INSN_P (last_scheduled_insn))
@@ -5944,8 +6402,8 @@
      in try_ready () (which is called through init_ready_list ()).  */
   (*current_sched_info->init_ready_list) ();
 
-  if (sched_pressure == SCHED_PRESSURE_MODEL)
-    model_start_schedule ();
+  if (sched_pressure)
+    sched_pressure_start_bb (*target_bb);
 
   /* The algorithm is O(n^2) in the number of ready insns at any given
      time in the worst case.  Before reload we are more likely to have
@@ -5953,7 +6411,8 @@
   if (!reload_completed
       && ready.n_ready - ready.n_debug > MAX_SCHED_READY_INSNS)
     {
-      ready_sort (&ready);
+      ready_sort_debug (&ready);
+      ready_sort_real (&ready);
 
       /* Find first free-standing insn past MAX_SCHED_READY_INSNS.
          If there are debug insns, we know they're first.  */
@@ -5964,7 +6423,8 @@
       if (sched_verbose >= 2)
 	{
 	  fprintf (sched_dump,
-		   ";;\t\tReady list on entry: %d insns\n", ready.n_ready);
+		   ";;\t\tReady list on entry: %d insns:  ", ready.n_ready);
+	  debug_ready_list (&ready);
 	  fprintf (sched_dump,
 		   ";;\t\t before reload => truncated to %d insns\n", i);
 	}
@@ -5976,7 +6436,7 @@
 	rtx skip_insn;
 
 	if (dbg_cnt (sched_insn) == false)
-	  skip_insn = next_nonnote_insn (nonscheduled_insns_begin);
+	  skip_insn = first_nonscheduled_insn ();
 	else
 	  skip_insn = NULL_RTX;
 
@@ -6031,7 +6491,7 @@
 
 	  if (sched_verbose >= 2)
 	    {
-	      fprintf (sched_dump, ";;\t\tReady list after queue_to_ready:  ");
+	      fprintf (sched_dump, ";;\t\tReady list after queue_to_ready:");
 	      debug_ready_list (&ready);
 	    }
 	  advance -= clock_var - start_clock_var;
@@ -6097,7 +6557,8 @@
 
 	      if (sched_verbose >= 2)
 		{
-		  fprintf (sched_dump, ";;\t\tReady list after ready_sort:  ");
+		  fprintf (sched_dump,
+			   ";;\t\tReady list after ready_sort:    ");
 		  debug_ready_list (&ready);
 		}
 	    }
@@ -6488,14 +6949,25 @@
       sched_extend_luids ();
     }
 
-  if (sched_verbose)
-    fprintf (sched_dump, ";;   new head = %d\n;;   new tail = %d\n\n",
-	     INSN_UID (head), INSN_UID (tail));
-
   /* Update head/tail boundaries.  */
   head = NEXT_INSN (prev_head);
   tail = last_scheduled_insn;
 
+  if (sched_verbose)
+    {
+      fprintf (sched_dump, ";;   new head = %d\n;;   new tail = %d\n",
+	       INSN_UID (head), INSN_UID (tail));
+
+      if (sched_verbose >= 2)
+	{
+	  dump_insn_stream (head, tail);
+	  print_rank_for_schedule_stats (";; TOTAL ", &rank_for_schedule_stats,
+					 NULL);
+	}
+
+      fprintf (sched_dump, "\n");
+    }
+
   head = restore_other_notes (head, NULL);
 
   current_sched_info->head = head;
@@ -6581,6 +7053,19 @@
 	  saved_reg_live = BITMAP_ALLOC (NULL);
 	  region_ref_regs = BITMAP_ALLOC (NULL);
 	}
+
+      /* Calculate number of CALL_USED_REGS in register classes that
+	 we calculate register pressure for.  */
+      for (int c = 0; c < ira_pressure_classes_num; ++c)
+	{
+	  enum reg_class cl = ira_pressure_classes[c];
+
+	  call_used_regs_num[cl] = 0;
+
+	  for (int i = 0; i < ira_class_hard_regs_num[cl]; ++i)
+	    if (call_used_regs[ira_class_hard_regs[cl][i]])
+	      ++call_used_regs_num[cl];
+	}
     }
 }
 
@@ -6660,18 +7145,17 @@
   else
     issue_rate = 1;
 
-  if (cached_issue_rate != issue_rate)
-    {
-      cached_issue_rate = issue_rate;
-      /* To invalidate max_lookahead_tries:  */
-      cached_first_cycle_multipass_dfa_lookahead = 0;
-    }
-
-  if (targetm.sched.first_cycle_multipass_dfa_lookahead)
+  if (targetm.sched.first_cycle_multipass_dfa_lookahead
+      /* Don't use max_issue with reg_pressure scheduling.  Multipass
+	 scheduling and reg_pressure scheduling undo each other's decisions.  */
+      && sched_pressure == SCHED_PRESSURE_NONE)
     dfa_lookahead = targetm.sched.first_cycle_multipass_dfa_lookahead ();
   else
     dfa_lookahead = 0;
 
+  /* Set to "0" so that we recalculate.  */
+  max_lookahead_tries = 0;
+
   if (targetm.sched.init_dfa_pre_cycle_insn)
     targetm.sched.init_dfa_pre_cycle_insn ();
 
@@ -7155,8 +7639,9 @@
 
   gcc_assert (new_sched_ready_n_insns >= sched_ready_n_insns);
 
-  ready_try = (char *) xrecalloc (ready_try, new_sched_ready_n_insns,
-                                  sched_ready_n_insns, sizeof (*ready_try));
+  ready_try = (signed char *) xrecalloc (ready_try, new_sched_ready_n_insns,
+					 sched_ready_n_insns,
+					 sizeof (*ready_try));
 
   /* We allocate +1 element to save initial state in the choice_stack[0]
      entry.  */
@@ -8429,6 +8914,10 @@
       INSN_EXACT_TICK (insn) = INVALID_TICK;
       INTER_TICK (insn) = INVALID_TICK;
       TODO_SPEC (insn) = HARD_DEP;
+      INSN_AUTOPREF_MULTIPASS_DATA (insn)[0].status
+	= AUTOPREF_MULTIPASS_DATA_UNINITIALIZED;
+      INSN_AUTOPREF_MULTIPASS_DATA (insn)[1].status
+	= AUTOPREF_MULTIPASS_DATA_UNINITIALIZED;
     }
 }
 
@@ -8534,7 +9023,7 @@
 rtx
 sched_emit_insn (rtx pat)
 {
-  rtx insn = emit_insn_before (pat, nonscheduled_insns_begin);
+  rtx insn = emit_insn_before (pat, first_nonscheduled_insn ());
   haifa_init_insn (insn);
 
   if (current_sched_info->add_remove_insn)
--- a/src/gcc/compare-elim.c
+++ b/src/gcc/compare-elim.c
@@ -100,6 +100,9 @@
      constants.  */
   rtx in_a, in_b;
 
+  /* The REG_EH_REGION of the comparison.  */
+  rtx eh_note;
+
   /* Information about how this comparison is used.  */
   struct comparison_use uses[MAX_CMP_USE];
 
@@ -262,6 +265,7 @@
   struct comparison *last_cmp;
   rtx insn, next, last_clobber;
   bool last_cmp_valid;
+  bool need_purge = false;
   bitmap killed;
 
   killed = BITMAP_ALLOC (NULL);
@@ -303,44 +307,60 @@
       if (src)
 	{
 	  enum machine_mode src_mode = GET_MODE (src);
+	  rtx eh_note = NULL;
 
-	  /* Eliminate a compare that's redundant with the previous.  */
-	  if (last_cmp_valid
-	      && rtx_equal_p (last_cmp->in_a, XEXP (src, 0))
-	      && rtx_equal_p (last_cmp->in_b, XEXP (src, 1)))
-	    {
-	      rtx flags, x;
-	      enum machine_mode new_mode
-		= targetm.cc_modes_compatible (last_cmp->orig_mode, src_mode);
+	  if (flag_non_call_exceptions)
+	    eh_note = find_reg_note (insn, REG_EH_REGION, NULL);
 
-	      /* New mode is incompatible with the previous compare mode.  */
-	      if (new_mode == VOIDmode)
-		continue;
+	  if (!last_cmp_valid)
+	    goto dont_delete;
 
-	      if (new_mode != last_cmp->orig_mode)
-		{
-		  flags = gen_rtx_REG (src_mode, targetm.flags_regnum);
+	  /* Take care that it's in the same EH region.  */
+	  if (flag_non_call_exceptions
+	      && !rtx_equal_p (eh_note, last_cmp->eh_note))
+	    goto dont_delete;
 
-		  /* Generate new comparison for substitution.  */
-		  x = gen_rtx_COMPARE (new_mode, XEXP (src, 0), XEXP (src, 1));
-		  x = gen_rtx_SET (VOIDmode, flags, x);
+	  /* Make sure the compare is redundant with the previous.  */
+	  if (!rtx_equal_p (last_cmp->in_a, XEXP (src, 0))
+	      || !rtx_equal_p (last_cmp->in_b, XEXP (src, 1)))
+	    goto dont_delete;
 
-		  if (!validate_change (last_cmp->insn,
-					&PATTERN (last_cmp->insn), x, false))
-		    continue;
+	  /* New mode must be compatible with the previous compare mode.  */
+	  {
+	    enum machine_mode new_mode
+	      = targetm.cc_modes_compatible (last_cmp->orig_mode, src_mode);
+	    if (new_mode == VOIDmode)
+	      goto dont_delete;
 
-		  last_cmp->orig_mode = new_mode;
-		}
+	    if (new_mode != last_cmp->orig_mode)
+	      {
+		rtx x, flags = gen_rtx_REG (src_mode, targetm.flags_regnum);
 
-	      delete_insn (insn);
-	      continue;
-	    }
+		/* Generate new comparison for substitution.  */
+		x = gen_rtx_COMPARE (new_mode, XEXP (src, 0), XEXP (src, 1));
+		x = gen_rtx_SET (VOIDmode, flags, x);
 
+		if (!validate_change (last_cmp->insn,
+				      &PATTERN (last_cmp->insn), x, false))
+		  goto dont_delete;
+
+		last_cmp->orig_mode = new_mode;
+	      }
+	  }
+
+	  /* All tests and substitutions succeeded!  */
+	  if (eh_note)
+	    need_purge = true;
+	  delete_insn (insn);
+	  continue;
+
+	dont_delete:
 	  last_cmp = XCNEW (struct comparison);
 	  last_cmp->insn = insn;
 	  last_cmp->prev_clobber = last_clobber;
 	  last_cmp->in_a = XEXP (src, 0);
 	  last_cmp->in_b = XEXP (src, 1);
+	  last_cmp->eh_note = eh_note;
 	  last_cmp->orig_mode = src_mode;
 	  all_compares.safe_push (last_cmp);
 
@@ -404,6 +424,11 @@
 	    }
 	}
     }
+
+  /* If we deleted a compare with a REG_EH_REGION note, we may need to
+     remove EH edges.  */
+  if (need_purge)
+    purge_dead_edges (bb);
 }
 
 /* Find all comparisons in the function.  */
--- a/src/gcc/tree-ssa-loop-ivopts.c
+++ b/src/gcc/tree-ssa-loop-ivopts.c
@@ -5863,6 +5863,108 @@
   return best_cost;
 }
 
+/* Check if CAND_IDX is a candidate other than OLD_CAND and has
+   cheaper local cost for USE than BEST_CP.  Return pointer to
+   the corresponding cost_pair, otherwise just return BEST_CP.  */
+
+static struct cost_pair*
+cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
+			unsigned int cand_idx, struct iv_cand *old_cand,
+			struct cost_pair *best_cp)
+{
+  struct iv_cand *cand;
+  struct cost_pair *cp;
+
+  gcc_assert (old_cand != NULL && best_cp != NULL);
+  if (cand_idx == old_cand->id)
+    return best_cp;
+
+  cand = iv_cand (data, cand_idx);
+  cp = get_use_iv_cost (data, use, cand);
+  if (cp != NULL && cheaper_cost_pair (cp, best_cp))
+    return cp;
+
+  return best_cp;
+}
+
+/* Try breaking local optimal fixed-point for IVS by replacing candidates
+   which are used by more than one iv uses.  For each of those candidates,
+   this function tries to represent iv uses under that candidate using
+   other ones with lower local cost, then tries to prune the new set.
+   If the new set has lower cost, It returns the new cost after recording
+   candidate replacement in list DELTA.  */
+
+static comp_cost
+iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
+	       struct iv_ca_delta **delta)
+{
+  bitmap_iterator bi, bj;
+  unsigned int i, j, k;
+  struct iv_use *use;
+  struct iv_cand *cand;
+  comp_cost orig_cost, acost;
+  struct iv_ca_delta *act_delta, *tmp_delta;
+  struct cost_pair *old_cp, *best_cp = NULL;
+
+  *delta = NULL;
+  orig_cost = iv_ca_cost (ivs);
+
+  EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
+    {
+      if (ivs->n_cand_uses[i] == 1
+	  || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
+	continue;
+
+      cand = iv_cand (data, i);
+  
+      act_delta = NULL;
+      /*  Represent uses under current candidate using other ones with
+	  lower local cost.  */
+      for (j = 0; j < ivs->upto; j++)
+	{
+	  use = iv_use (data, j);
+	  old_cp = iv_ca_cand_for_use (ivs, use);
+
+	  if (old_cp->cand != cand)
+	    continue;
+
+	  best_cp = old_cp;
+	  if (data->consider_all_candidates)
+	    for (k = 0; k < n_iv_cands (data); k++)
+	      best_cp = cheaper_cost_with_cand (data, use, k,
+						old_cp->cand, best_cp);
+	  else
+	    EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
+	      best_cp = cheaper_cost_with_cand (data, use, k,
+						old_cp->cand, best_cp);
+
+	  if (best_cp == old_cp)
+	    continue;
+
+	  act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
+	}
+      /* No need for further prune.  */
+      if (!act_delta)
+	continue;
+
+      /* Prune the new candidate set.  */
+      iv_ca_delta_commit (data, ivs, act_delta, true);
+      acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
+      iv_ca_delta_commit (data, ivs, act_delta, false);
+      act_delta = iv_ca_delta_join (act_delta, tmp_delta);
+
+      if (compare_costs (acost, orig_cost) < 0)
+	{
+	  *delta = act_delta;
+	  return acost;
+	}
+      else
+	iv_ca_delta_free (&act_delta);
+    }
+
+  return orig_cost;
+}
+
 /* Tries to extend the sets IVS in the best possible way in order
    to express the USE.  If ORIGINALP is true, prefer candidates from
    the original set of IVs, otherwise favor important candidates not
@@ -6005,10 +6107,13 @@
   return ivs;
 }
 
-/* Tries to improve set of induction variables IVS.  */
+/* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
+   points to a bool variable, this function tries to break local
+   optimal fixed-point by replacing candidates in IVS if it's true.  */
 
 static bool
-try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
+try_improve_iv_set (struct ivopts_data *data,
+		    struct iv_ca *ivs, bool *try_replace_p)
 {
   unsigned i, n_ivs;
   comp_cost acost, best_cost = iv_ca_cost (ivs);
@@ -6052,7 +6157,20 @@
       /* Try removing the candidates from the set instead.  */
       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
 
-      /* Nothing more we can do.  */
+      if (!best_delta && *try_replace_p)
+	{
+	  *try_replace_p = false;
+	  /* So far candidate selecting algorithm tends to choose fewer IVs
+	     so that it can handle cases in which loops have many variables
+	     but the best choice is often to use only one general biv.  One
+	     weakness is it can't handle opposite cases, in which different
+	     candidates should be chosen with respect to each use.  To solve
+	     the problem, we replace candidates in a manner described by the
+	     comments of iv_ca_replace, thus give general algorithm a chance
+	     to break local optimal fixed-point in these cases.  */
+	  best_cost = iv_ca_replace (data, ivs, &best_delta);
+	}
+
       if (!best_delta)
 	return false;
     }
@@ -6071,6 +6189,7 @@
 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
 {
   struct iv_ca *set;
+  bool try_replace_p = true;
 
   /* Get the initial solution.  */
   set = get_initial_solution (data, originalp);
@@ -6087,7 +6206,7 @@
       iv_ca_dump (data, dump_file, set);
     }
 
-  while (try_improve_iv_set (data, set))
+  while (try_improve_iv_set (data, set, &try_replace_p))
     {
       if (dump_file && (dump_flags & TDF_DETAILS))
 	{
--- a/src/gcc/ira-int.h
+++ b/src/gcc/ira-int.h
@@ -316,7 +316,7 @@
      number (0, ...) - 2.  Value -1 is used for allocnos spilled by the
      reload (at this point pseudo-register has only one allocno) which
      did not get stack slot yet.  */
-  int hard_regno : 16;
+  signed int hard_regno : 16;
   /* Allocnos with the same regno are linked by the following member.
      Allocnos corresponding to inner loops are first in the list (it
      corresponds to depth-first traverse of the loops).  */
--- a/src/gcc/ira-color.c
+++ b/src/gcc/ira-color.c
@@ -1711,6 +1711,7 @@
         {
 	  ira_allocno_t conflict_a = OBJECT_ALLOCNO (conflict_obj);
 	  enum reg_class conflict_aclass;
+	  allocno_color_data_t data = ALLOCNO_COLOR_DATA (conflict_a);
 
 	  /* Reload can give another class so we need to check all
 	     allocnos.  */
@@ -1782,7 +1783,12 @@
 		    hard_regno = ira_class_hard_regs[aclass][j];
 		    ira_assert (hard_regno >= 0);
 		    k = ira_class_hard_reg_index[conflict_aclass][hard_regno];
-		    if (k < 0)
+		    if (k < 0
+			   /* If HARD_REGNO is not available for CONFLICT_A,
+			      the conflict would be ignored, since HARD_REGNO
+			      will never be assigned to CONFLICT_A.  */
+			|| !TEST_HARD_REG_BIT (data->profitable_hard_regs,
+					       hard_regno))
 		      continue;
 		    full_costs[j] -= conflict_costs[k];
 		  }
--- a/src/gcc/sel-sched.c
+++ b/src/gcc/sel-sched.c
@@ -3502,8 +3502,6 @@
 static void
 process_spec_exprs (av_set_t *av_ptr)
 {
-  bool try_data_p = true;
-  bool try_control_p = true;
   expr_t expr;
   av_set_iterator si;
 
@@ -3529,35 +3527,7 @@
           av_set_iter_remove (&si);
           continue;
         }
-
-      if ((spec_info->flags & PREFER_NON_DATA_SPEC)
-          && !(ds & BEGIN_DATA))
-        try_data_p = false;
-
-      if ((spec_info->flags & PREFER_NON_CONTROL_SPEC)
-          && !(ds & BEGIN_CONTROL))
-        try_control_p = false;
     }
-
-  FOR_EACH_EXPR_1 (expr, si, av_ptr)
-    {
-      ds_t ds;
-
-      ds = EXPR_SPEC_DONE_DS (expr);
-
-      if (ds & SPECULATIVE)
-        {
-          if ((ds & BEGIN_DATA) && !try_data_p)
-            /* We don't want any data speculative instructions right
-               now.  */
-            av_set_iter_remove (&si);
-
-          if ((ds & BEGIN_CONTROL) && !try_control_p)
-            /* We don't want any control speculative instructions right
-               now.  */
-            av_set_iter_remove (&si);
-        }
-    }
 }
 
 /* Search for any use-like insns in AV_PTR and decide on scheduling
@@ -4255,7 +4225,7 @@
       if (! have_hook || i == 0)
         r = 0;
       else
-        r = !targetm.sched.first_cycle_multipass_dfa_lookahead_guard (insn);
+        r = targetm.sched.first_cycle_multipass_dfa_lookahead_guard (insn, i);
 
       gcc_assert (INSN_CODE (insn) >= 0);
 
--- a/src/gcc/loop-init.c
+++ b/src/gcc/loop-init.c
@@ -245,6 +245,10 @@
 	}
 
       /* Remove the loop.  */
+      if (loop->header)
+	loop->former_header = loop->header;
+      else
+	gcc_assert (loop->former_header != NULL);
       loop->header = NULL;
       flow_loop_tree_node_remove (loop);
     }
@@ -272,6 +276,33 @@
   FOR_EACH_VEC_ELT (*get_loops (cfun), i, loop)
     if (loop && loop->header == NULL)
       {
+	if (dump_file
+	    && ((unsigned) loop->former_header->index
+		< basic_block_info_for_fn (cfun)->length ()))
+	  {
+	    basic_block former_header
+	      = BASIC_BLOCK_FOR_FN (cfun, loop->former_header->index);
+	    /* If the old header still exists we want to check if the
+	       original loop is re-discovered or the old header is now
+	       part of a newly discovered loop.
+	       In both cases we should have avoided removing the loop.  */
+	    if (former_header == loop->former_header)
+	      {
+		if (former_header->loop_father->header == former_header)
+		  fprintf (dump_file, "fix_loop_structure: rediscovered "
+			   "removed loop %d as loop %d with old header %d\n",
+			   loop->num, former_header->loop_father->num,
+			   former_header->index);
+		else if ((unsigned) former_header->loop_father->num
+			 >= old_nloops)
+		  fprintf (dump_file, "fix_loop_structure: header %d of "
+			   "removed loop %d is part of the newly "
+			   "discovered loop %d with header %d\n",
+			   former_header->index, loop->num,
+			   former_header->loop_father->num,
+			   former_header->loop_father->header->index);
+	      }
+	  }
 	(*get_loops (cfun))[i] = NULL;
 	flow_loop_free (loop);
       }
--- a/src/gcc/ifcvt.c
+++ b/src/gcc/ifcvt.c
@@ -63,6 +63,10 @@
    + 1)
 #endif
 
+#ifndef HAVE_cbranchcc4
+#define HAVE_cbranchcc4 0
+#endif
+
 #define IFCVT_MULTIPLE_DUMPS 1
 
 #define NULL_BLOCK	((basic_block) NULL)
@@ -1000,6 +1004,18 @@
 		   0, 0, outmode, y);
 }
 
+/* Return the CC reg if it is used in COND.  */
+
+static rtx
+cc_in_cond (rtx cond)
+{
+  if (HAVE_cbranchcc4 && cond
+      && GET_MODE_CLASS (GET_MODE (XEXP (cond, 0))) == MODE_CC)
+    return XEXP (cond, 0);
+
+  return NULL_RTX;
+}
+
 /* Return sequence of instructions generated by if conversion.  This
    function calls end_sequence() to end the current stream, ensures
    that are instructions are unshared, recognizable non-jump insns.
@@ -1010,6 +1026,7 @@
 {
   rtx insn;
   rtx seq = get_insns ();
+  rtx cc = cc_in_cond (if_info->cond);
 
   set_used_flags (if_info->x);
   set_used_flags (if_info->cond);
@@ -1024,7 +1041,9 @@
      allows proper placement of required clobbers.  */
   for (insn = seq; insn; insn = NEXT_INSN (insn))
     if (JUMP_P (insn)
-	|| recog_memoized (insn) == -1)
+	|| recog_memoized (insn) == -1
+	   /* Make sure new generated code does not clobber CC.  */
+	|| (cc && set_of (cc, insn)))
       return NULL_RTX;
 
   return seq;
@@ -1432,10 +1451,16 @@
       end_sequence ();
     }
 
-  /* Don't even try if the comparison operands are weird.  */
+  /* Don't even try if the comparison operands are weird
+     except that the target supports cbranchcc4.  */
   if (! general_operand (cmp_a, GET_MODE (cmp_a))
       || ! general_operand (cmp_b, GET_MODE (cmp_b)))
-    return NULL_RTX;
+    {
+      if (!(HAVE_cbranchcc4)
+	  || GET_MODE_CLASS (GET_MODE (cmp_a)) != MODE_CC
+	  || cmp_b != const0_rtx)
+	return NULL_RTX;
+    }
 
 #if HAVE_conditional_move
   unsignedp = (code == LTU || code == GEU
@@ -1874,7 +1899,7 @@
     }
 
   cond = canonicalize_condition (if_info->jump, cond, reverse,
-				 earliest, target, false, true);
+				 earliest, target, HAVE_cbranchcc4, true);
   if (! cond || ! reg_mentioned_p (target, cond))
     return NULL;
 
@@ -2361,7 +2386,7 @@
   /* Otherwise, fall back on canonicalize_condition to do the dirty
      work of manipulating MODE_CC values and COMPARE rtx codes.  */
   tmp = canonicalize_condition (jump, cond, reverse, earliest,
-				NULL_RTX, false, true);
+				NULL_RTX, HAVE_cbranchcc4, true);
 
   /* We don't handle side-effects in the condition, like handling
      REG_INC notes and making sure no duplicate conditions are emitted.  */
@@ -2494,6 +2519,7 @@
   rtx insn_a, insn_b;
   rtx set_a, set_b;
   rtx orig_x, x, a, b;
+  rtx cc;
 
   /* We're looking for patterns of the form
 
@@ -2602,6 +2628,13 @@
   if_info->a = a;
   if_info->b = b;
 
+  /* Skip it if the instruction to be moved might clobber CC.  */
+  cc = cc_in_cond (cond);
+  if (cc
+      && (set_of (cc, insn_a)
+	  || (insn_b && set_of (cc, insn_b))))
+    return FALSE;
+
   /* Try optimizations in some approximation of a useful order.  */
   /* ??? Should first look to see if X is live incoming at all.  If it
      isn't, we don't need anything but an unconditional set.  */
@@ -2757,6 +2790,7 @@
 		       rtx cond)
 {
   rtx insn;
+  rtx cc = cc_in_cond (cond);
 
    /* We can only handle simple jumps at the end of the basic block.
       It is almost impossible to update the CFG otherwise.  */
@@ -2815,6 +2849,10 @@
 	  && modified_between_p (src, insn, NEXT_INSN (BB_END (bb))))
 	return FALSE;
 
+      /* Skip it if the instruction to be moved might clobber CC.  */
+      if (cc && set_of (cc, insn))
+	return FALSE;
+
       slot = pointer_map_insert (vals, (void *) dest);
       *slot = (void *) src;
 
--- a/src/gcc/expr.c
+++ b/src/gcc/expr.c
@@ -68,22 +68,6 @@
 #include "tree-ssa-address.h"
 #include "cfgexpand.h"
 
-/* Decide whether a function's arguments should be processed
-   from first to last or from last to first.
-
-   They should if the stack and args grow in opposite directions, but
-   only if we have push insns.  */
-
-#ifdef PUSH_ROUNDING
-
-#ifndef PUSH_ARGS_REVERSED
-#if defined (STACK_GROWS_DOWNWARD) != defined (ARGS_GROW_DOWNWARD)
-#define PUSH_ARGS_REVERSED	/* If it's last to first.  */
-#endif
-#endif
-
-#endif
-
 #ifndef STACK_PUSH_CODE
 #ifdef STACK_GROWS_DOWNWARD
 #define STACK_PUSH_CODE PRE_DEC
@@ -172,37 +156,6 @@
 static rtx const_vector_from_tree (tree);
 static void write_complex_part (rtx, rtx, bool);
 
-/* This macro is used to determine whether move_by_pieces should be called
-   to perform a structure copy.  */
-#ifndef MOVE_BY_PIECES_P
-#define MOVE_BY_PIECES_P(SIZE, ALIGN) \
-  (move_by_pieces_ninsns (SIZE, ALIGN, MOVE_MAX_PIECES + 1) \
-   < (unsigned int) MOVE_RATIO (optimize_insn_for_speed_p ()))
-#endif
-
-/* This macro is used to determine whether clear_by_pieces should be
-   called to clear storage.  */
-#ifndef CLEAR_BY_PIECES_P
-#define CLEAR_BY_PIECES_P(SIZE, ALIGN) \
-  (move_by_pieces_ninsns (SIZE, ALIGN, STORE_MAX_PIECES + 1) \
-   < (unsigned int) CLEAR_RATIO (optimize_insn_for_speed_p ()))
-#endif
-
-/* This macro is used to determine whether store_by_pieces should be
-   called to "memset" storage with byte values other than zero.  */
-#ifndef SET_BY_PIECES_P
-#define SET_BY_PIECES_P(SIZE, ALIGN) \
-  (move_by_pieces_ninsns (SIZE, ALIGN, STORE_MAX_PIECES + 1) \
-   < (unsigned int) SET_RATIO (optimize_insn_for_speed_p ()))
-#endif
-
-/* This macro is used to determine whether store_by_pieces should be
-   called to "memcpy" storage when the source is a constant string.  */
-#ifndef STORE_BY_PIECES_P
-#define STORE_BY_PIECES_P(SIZE, ALIGN) \
-  (move_by_pieces_ninsns (SIZE, ALIGN, STORE_MAX_PIECES + 1) \
-   < (unsigned int) MOVE_RATIO (optimize_insn_for_speed_p ()))
-#endif
 
 /* This is run to set up which modes can be used
    directly in memory and to initialize the block move optab.  It is run
@@ -843,22 +796,16 @@
   return mode;
 }
 
-/* STORE_MAX_PIECES is the number of bytes at a time that we can
-   store efficiently.  Due to internal GCC limitations, this is
-   MOVE_MAX_PIECES limited by the number of bytes GCC can represent
-   for an immediate constant.  */
-
-#define STORE_MAX_PIECES  MIN (MOVE_MAX_PIECES, 2 * sizeof (HOST_WIDE_INT))
-
 /* Determine whether the LEN bytes can be moved by using several move
    instructions.  Return nonzero if a call to move_by_pieces should
    succeed.  */
 
 int
-can_move_by_pieces (unsigned HOST_WIDE_INT len ATTRIBUTE_UNUSED,
-		    unsigned int align ATTRIBUTE_UNUSED)
+can_move_by_pieces (unsigned HOST_WIDE_INT len,
+		    unsigned int align)
 {
-  return MOVE_BY_PIECES_P (len, align);
+  return targetm.use_by_pieces_infrastructure_p (len, align, MOVE_BY_PIECES,
+						 optimize_insn_for_speed_p ());
 }
 
 /* Generate several move instructions to copy LEN bytes from block FROM to
@@ -1195,7 +1142,7 @@
       set_mem_size (y, INTVAL (size));
     }
 
-  if (CONST_INT_P (size) && MOVE_BY_PIECES_P (INTVAL (size), align))
+  if (CONST_INT_P (size) && can_move_by_pieces (INTVAL (size), align))
     move_by_pieces (x, y, INTVAL (size), align, 0);
   else if (emit_block_move_via_movmem (x, y, size, align,
 				       expected_align, expected_size,
@@ -2396,6 +2343,18 @@
     = gen_rtx_EXPR_LIST (mode, gen_rtx_USE (VOIDmode, reg), *call_fusage);
 }
 
+/* Add a CLOBBER expression for REG to the (possibly empty) list pointed
+   to by CALL_FUSAGE.  REG must denote a hard register.  */
+
+void
+clobber_reg_mode (rtx *call_fusage, rtx reg, enum machine_mode mode)
+{
+  gcc_assert (REG_P (reg) && REGNO (reg) < FIRST_PSEUDO_REGISTER);
+
+  *call_fusage
+    = gen_rtx_EXPR_LIST (mode, gen_rtx_CLOBBER (VOIDmode, reg), *call_fusage);
+}
+
 /* Add USE expressions to *CALL_FUSAGE for each of NREGS consecutive regs,
    starting at REGNO.  All of these registers must be hard registers.  */
 
@@ -2498,9 +2457,11 @@
   if (len == 0)
     return 1;
 
-  if (! (memsetp
-	 ? SET_BY_PIECES_P (len, align)
-	 : STORE_BY_PIECES_P (len, align)))
+  if (!targetm.use_by_pieces_infrastructure_p (len, align,
+					       memsetp
+						 ? SET_BY_PIECES
+						 : STORE_BY_PIECES,
+					       optimize_insn_for_speed_p ()))
     return 0;
 
   align = alignment_for_piecewise_move (STORE_MAX_PIECES, align);
@@ -2576,9 +2537,13 @@
       return to;
     }
 
-  gcc_assert (memsetp
-	      ? SET_BY_PIECES_P (len, align)
-	      : STORE_BY_PIECES_P (len, align));
+  gcc_assert (targetm.use_by_pieces_infrastructure_p
+		(len, align,
+		 memsetp
+		   ? SET_BY_PIECES
+		   : STORE_BY_PIECES,
+		 optimize_insn_for_speed_p ()));
+
   data.constfun = constfun;
   data.constfundata = constfundata;
   data.len = len;
@@ -2815,7 +2780,9 @@
   align = MEM_ALIGN (object);
 
   if (CONST_INT_P (size)
-      && CLEAR_BY_PIECES_P (INTVAL (size), align))
+      && targetm.use_by_pieces_infrastructure_p (INTVAL (size), align,
+						 CLEAR_BY_PIECES,
+						 optimize_insn_for_speed_p ()))
     clear_by_pieces (object, INTVAL (size), align);
   else if (set_storage_via_setmem (object, size, const0_rtx, align,
 				   expected_align, expected_size,
@@ -4221,7 +4188,7 @@
 	  && CONST_INT_P (size)
 	  && skip == 0
 	  && MEM_ALIGN (xinner) >= align
-	  && (MOVE_BY_PIECES_P ((unsigned) INTVAL (size) - used, align))
+	  && can_move_by_pieces ((unsigned) INTVAL (size) - used, align)
 	  /* Here we avoid the case of a structure whose weak alignment
 	     forces many pushes of a small amount of data,
 	     and such small pushes do rounding that causes trouble.  */
@@ -4353,11 +4320,7 @@
       /* Loop over all the words allocated on the stack for this arg.  */
       /* We can do it by words, because any scalar bigger than a word
 	 has a size a multiple of a word.  */
-#ifndef PUSH_ARGS_REVERSED
-      for (i = not_stack; i < size; i++)
-#else
       for (i = size - 1; i >= not_stack; i--)
-#endif
 	if (i >= not_stack + offset)
 	  emit_push_insn (operand_subword_force (x, i, mode),
 			  word_mode, NULL_TREE, NULL_RTX, align, 0, NULL_RTX,
@@ -7839,7 +7802,7 @@
 	    && ! (target != 0 && safe_from_p (target, exp, 1)))
 		  || TREE_ADDRESSABLE (exp)
 		  || (tree_fits_uhwi_p (TYPE_SIZE_UNIT (type))
-		      && (! MOVE_BY_PIECES_P
+		      && (! can_move_by_pieces
 				     (tree_to_uhwi (TYPE_SIZE_UNIT (type)),
 				      TYPE_ALIGN (type)))
 		      && ! mostly_zeros_p (exp))))
@@ -9044,12 +9007,39 @@
       {
         op0 = expand_normal (treeop0);
         this_optab = optab_for_tree_code (code, type, optab_default);
-        temp = expand_unop (mode, this_optab, op0, target, unsignedp);
+        enum machine_mode vec_mode = TYPE_MODE (TREE_TYPE (treeop0));
+
+	if (optab_handler (this_optab, vec_mode) != CODE_FOR_nothing)
+	  {
+	    struct expand_operand ops[2];
+	    enum insn_code icode = optab_handler (this_optab, vec_mode);
+
+	    create_output_operand (&ops[0], target, mode);
+	    create_input_operand (&ops[1], op0, vec_mode);
+	    if (maybe_expand_insn (icode, 2, ops))
+	      {
+		target = ops[0].value;
+		if (GET_MODE (target) != mode)
+		  return gen_lowpart (tmode, target);
+		return target;
+	      }
+	  }
+	/* Fall back to optab with vector result, and then extract scalar.  */
+	this_optab = scalar_reduc_to_vector (this_optab, type);
+        temp = expand_unop (vec_mode, this_optab, op0, NULL_RTX, unsignedp);
         gcc_assert (temp);
+        /* The tree code produces a scalar result, but (somewhat by convention)
+           the optab produces a vector with the result in element 0 if
+           little-endian, or element N-1 if big-endian.  So pull the scalar
+           result out of that element.  */
+        int index = BYTES_BIG_ENDIAN ? GET_MODE_NUNITS (vec_mode) - 1 : 0;
+        int bitsize = GET_MODE_BITSIZE (GET_MODE_INNER (vec_mode));
+        temp = extract_bit_field (temp, bitsize, bitsize * index, unsignedp,
+				  target, mode, mode);
+        gcc_assert (temp);
         return temp;
       }
 
-    case VEC_LSHIFT_EXPR:
     case VEC_RSHIFT_EXPR:
       {
 	target = expand_vec_shift_expr (ops, target);
--- a/src/gcc/expr.h
+++ b/src/gcc/expr.h
@@ -346,6 +346,7 @@
 /* Mark REG as holding a parameter for the next CALL_INSN.
    Mode is TYPE_MODE of the non-promoted parameter, or VOIDmode.  */
 extern void use_reg_mode (rtx *, rtx, enum machine_mode);
+extern void clobber_reg_mode (rtx *, rtx, enum machine_mode);
 
 extern rtx copy_blkmode_to_reg (enum machine_mode, tree);
 
@@ -356,6 +357,13 @@
   use_reg_mode (fusage, reg, VOIDmode);
 }
 
+/* Mark REG as clobbered by the call with FUSAGE as CALL_INSN_FUNCTION_USAGE.  */
+static inline void
+clobber_reg (rtx *fusage, rtx reg)
+{
+  clobber_reg_mode (fusage, reg, VOIDmode);
+}
+
 /* Mark NREGS consecutive regs, starting at REGNO, as holding parameters
    for the next CALL_INSN.  */
 extern void use_regs (rtx *, int, int);
--- a/src/gcc/go/ChangeLog.linaro
+++ b/src/gcc/go/ChangeLog.linaro
@@ -0,0 +1,75 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
--- a/src/gcc/genattrtab.c
+++ b/src/gcc/genattrtab.c
@@ -4765,6 +4765,7 @@
 
 static struct bypass_list *all_bypasses;
 static size_t n_bypasses;
+static size_t n_bypassed;
 
 static void
 gen_bypass_1 (const char *s, size_t len)
@@ -4810,12 +4811,18 @@
   struct bypass_list *b;
   struct insn_reserv *r;
 
+  n_bypassed = 0;
+
   /* The reservation list is likely to be much longer than the bypass
      list.  */
   for (r = all_insn_reservs; r; r = r->next)
     for (b = all_bypasses; b; b = b->next)
       if (fnmatch (b->pattern, r->name, 0) == 0)
-	r->bypassed = true;
+        {
+          n_bypassed++;
+          r->bypassed = true;
+          break;
+        }
 }
 
 /* Check that attribute NAME is used in define_insn_reservation condition
@@ -5074,7 +5081,7 @@
       process_bypasses ();
 
       byps_exp = rtx_alloc (COND);
-      XVEC (byps_exp, 0) = rtvec_alloc (n_bypasses * 2);
+      XVEC (byps_exp, 0) = rtvec_alloc (n_bypassed * 2);
       XEXP (byps_exp, 1) = make_numeric_value (0);
       for (decl = all_insn_reservs, i = 0;
 	   decl;
--- a/src/gcc/ada/ChangeLog.linaro
+++ b/src/gcc/ada/ChangeLog.linaro
@@ -0,0 +1,119 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-05-13  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209653,209866,209871.
+
+	2014-04-28  Richard Henderson  <rth@redhat.com>
+
+	* gcc-interface/Makefile.in: Support aarch64-linux.
+
+	2014-04-28  Eric Botcazou  <ebotcazou@adacore.com>
+
+	* exp_dbug.ads (Get_External_Name): Add 'False' default to Has_Suffix,
+	add 'Suffix' parameter and adjust comment.
+	(Get_External_Name_With_Suffix): Delete.
+	* exp_dbug.adb (Get_External_Name_With_Suffix): Merge into...
+	(Get_External_Name): ...here.  Add 'False' default to Has_Suffix, add
+	'Suffix' parameter.
+	(Get_Encoded_Name): Remove 2nd argument in call to Get_External_Name.
+	Call Get_External_Name instead of Get_External_Name_With_Suffix.
+	(Get_Secondary_DT_External_Name): Likewise.
+	* exp_cg.adb (Write_Call_Info): Likewise.
+	* exp_disp.adb (Export_DT): Likewise.
+	(Import_DT): Likewise.
+	* comperr.ads (Compiler_Abort): Remove Code parameter and add From_GCC
+	parameter with False default.
+	* comperr.adb (Compiler_Abort): Likewise.  Adjust accordingly.
+	* types.h (Fat_Pointer): Rename into...
+	(String_Pointer): ...this.  Add comment on interfacing rules.
+	* fe.h (Compiler_Abort): Adjust for above renaming.
+	(Error_Msg_N): Likewise.
+	(Error_Msg_NE): Likewise.
+	(Get_External_Name): Likewise.  Add third parameter.
+	(Get_External_Name_With_Suffix): Delete.
+	* gcc-interface/decl.c (STDCALL_PREFIX): Define.
+	(create_concat_name): Adjust call to Get_External_Name, remove call to
+	Get_External_Name_With_Suffix, use STDCALL_PREFIX, adjust for renaming.
+	* gcc-interface/trans.c (post_error): Likewise.
+	(post_error_ne): Likewise.
+	* gcc-interface/misc.c (internal_error_function): Likewise.
+
+	2014-04-22  Richard Henderson  <rth@redhat.com>
+
+	* init.c [__linux__] (HAVE_GNAT_ALTERNATE_STACK): New define.
+	(__gnat_alternate_stack): Enable for all linux except ia64.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
--- a/src/gcc/common/config/aarch64/aarch64-common.c
+++ b/src/gcc/common/config/aarch64/aarch64-common.c
@@ -44,6 +44,8 @@
   {
     /* Enable section anchors by default at -O1 or higher.  */
     { OPT_LEVELS_1_PLUS, OPT_fsection_anchors, NULL, 1 },
+    /* Enable -fsched-pressure by default when optimizing.  */
+    { OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 },
     /* Enable redundant extension instructions removal at -O2 and higher.  */
     { OPT_LEVELS_2_PLUS, OPT_free, NULL, 1 },
     { OPT_LEVELS_NONE, 0, NULL, 0 }
--- a/src/gcc/tree-eh.c
+++ b/src/gcc/tree-eh.c
@@ -4246,10 +4246,9 @@
 	if (current_loops
 	    && e->dest == e->dest->loop_father->header)
 	  {
-	    e->dest->loop_father->header = NULL;
-	    e->dest->loop_father->latch = NULL;
+	    mark_loop_for_removal (e->dest->loop_father);
 	    new_bb->loop_father->latch = NULL;
-	    loops_state_set (LOOPS_NEED_FIXUP|LOOPS_MAY_HAVE_MULTIPLE_LATCHES);
+	    loops_state_set (LOOPS_MAY_HAVE_MULTIPLE_LATCHES);
 	  }
 	redirect_eh_edge_1 (e, new_bb, change_region);
 	redirect_edge_succ (e, new_bb);
--- a/src/gcc/fortran/ChangeLog.linaro
+++ b/src/gcc/fortran/ChangeLog.linaro
@@ -0,0 +1,75 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
--- a/src/gcc/configure.ac
+++ b/src/gcc/configure.ac
@@ -809,7 +809,7 @@
 )
 AC_SUBST(CONFIGURE_SPECS)
 
-ACX_PKGVERSION([GCC])
+ACX_PKGVERSION([Linaro GCC `cat $srcdir/LINARO-VERSION`])
 ACX_BUGURL([http://gcc.gnu.org/bugs.html])
 
 # Sanity check enable_languages in case someone does not run the toplevel
--- a/src/gcc/tree-vect-loop.c
+++ b/src/gcc/tree-vect-loop.c
@@ -1892,9 +1892,9 @@
 
    Output:
    REDUC_CODE - the corresponding tree-code to be used to reduce the
-      vector of partial results into a single scalar result (which
-      will also reside in a vector) or ERROR_MARK if the operation is
-      a supported reduction operation, but does not have such tree-code.
+      vector of partial results into a single scalar result, or ERROR_MARK
+      if the operation is a supported reduction operation, but does not have
+      such a tree-code.
 
    Return FALSE if CODE currently cannot be vectorized as reduction.  */
 
@@ -4193,6 +4193,7 @@
   if (reduc_code != ERROR_MARK && !slp_reduc)
     {
       tree tmp;
+      tree vec_elem_type;
 
       /*** Case 1:  Create:
            v_out2 = reduc_expr <v_out1>  */
@@ -4201,14 +4202,26 @@
         dump_printf_loc (MSG_NOTE, vect_location,
 			 "Reduce using direct vector reduction.\n");
 
-      vec_dest = vect_create_destination_var (scalar_dest, vectype);
-      tmp = build1 (reduc_code, vectype, new_phi_result);
-      epilog_stmt = gimple_build_assign (vec_dest, tmp);
-      new_temp = make_ssa_name (vec_dest, epilog_stmt);
+      vec_elem_type = TREE_TYPE (TREE_TYPE (new_phi_result));
+      if (!useless_type_conversion_p (scalar_type, vec_elem_type))
+	{
+          tree tmp_dest =
+	      vect_create_destination_var (scalar_dest, vec_elem_type);
+	  tmp = build1 (reduc_code, vec_elem_type, new_phi_result);
+	  epilog_stmt = gimple_build_assign (tmp_dest, tmp);
+	  new_temp = make_ssa_name (tmp_dest, epilog_stmt);
+	  gimple_assign_set_lhs (epilog_stmt, new_temp);
+	  gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
+
+	  tmp = build1 (NOP_EXPR, scalar_type, new_temp);
+	}
+      else
+	tmp = build1 (reduc_code, scalar_type, new_phi_result);
+      epilog_stmt = gimple_build_assign (new_scalar_dest, tmp);
+      new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
       gimple_assign_set_lhs (epilog_stmt, new_temp);
       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
-
-      extract_scalar_result = true;
+      scalar_results.safe_push (new_temp);
     }
   else
     {
@@ -5120,15 +5133,17 @@
 
           epilog_reduc_code = ERROR_MARK;
         }
-
-      if (reduc_optab
-          && optab_handler (reduc_optab, vec_mode) == CODE_FOR_nothing)
+      else if (optab_handler (reduc_optab, vec_mode) == CODE_FOR_nothing)
         {
-          if (dump_enabled_p ())
-	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-			     "reduc op not supported by target.\n");
+          optab = scalar_reduc_to_vector (reduc_optab, vectype_out);
+          if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
+            {
+              if (dump_enabled_p ())
+	        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+				 "reduc op not supported by target.\n");
 
-          epilog_reduc_code = ERROR_MARK;
+	      epilog_reduc_code = ERROR_MARK;
+	    }
         }
     }
   else
--- a/src/gcc/calls.c
+++ b/src/gcc/calls.c
@@ -1104,8 +1104,6 @@
 {
   CUMULATIVE_ARGS *args_so_far_pnt = get_cumulative_args (args_so_far);
   location_t loc = EXPR_LOCATION (exp);
-  /* 1 if scanning parms front to back, -1 if scanning back to front.  */
-  int inc;
 
   /* Count arg position in order args appear.  */
   int argpos;
@@ -1116,22 +1114,9 @@
   args_size->var = 0;
 
   /* In this loop, we consider args in the order they are written.
-     We fill up ARGS from the front or from the back if necessary
-     so that in any case the first arg to be pushed ends up at the front.  */
+     We fill up ARGS from the back.  */
 
-  if (PUSH_ARGS_REVERSED)
-    {
-      i = num_actuals - 1, inc = -1;
-      /* In this case, must reverse order of args
-	 so that we compute and push the last arg first.  */
-    }
-  else
-    {
-      i = 0, inc = 1;
-    }
-
-  /* First fill in the actual arguments in the ARGS array, splitting
-     complex arguments if necessary.  */
+  i = num_actuals - 1;
   {
     int j = i;
     call_expr_arg_iterator iter;
@@ -1140,7 +1125,7 @@
     if (struct_value_addr_value)
       {
 	args[j].tree_value = struct_value_addr_value;
-	j += inc;
+	j--;
       }
     FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
       {
@@ -1152,17 +1137,17 @@
 	  {
 	    tree subtype = TREE_TYPE (argtype);
 	    args[j].tree_value = build1 (REALPART_EXPR, subtype, arg);
-	    j += inc;
+	    j--;
 	    args[j].tree_value = build1 (IMAGPART_EXPR, subtype, arg);
 	  }
 	else
 	  args[j].tree_value = arg;
-	j += inc;
+	j--;
       }
   }
 
   /* I counts args in order (to be) pushed; ARGPOS counts in order written.  */
-  for (argpos = 0; argpos < num_actuals; i += inc, argpos++)
+  for (argpos = 0; argpos < num_actuals; i--, argpos++)
     {
       tree type = TREE_TYPE (args[i].tree_value);
       int unsignedp;
@@ -2952,9 +2937,8 @@
 
       compute_argument_addresses (args, argblock, num_actuals);
 
-      /* If we push args individually in reverse order, perform stack alignment
-	 before the first push (the last arg).  */
-      if (PUSH_ARGS_REVERSED && argblock == 0
+      /* Perform stack alignment before the first push (the last arg).  */
+      if (argblock == 0
           && adjusted_args_size.constant > reg_parm_stack_space
 	  && adjusted_args_size.constant != unadjusted_args_size)
 	{
@@ -3097,12 +3081,6 @@
 		sibcall_failure = 1;
 	    }
 
-      /* If we pushed args in forward order, perform stack alignment
-	 after pushing the last arg.  */
-      if (!PUSH_ARGS_REVERSED && argblock == 0)
-	anti_adjust_stack (GEN_INT (adjusted_args_size.constant
-				    - unadjusted_args_size));
-
       /* If register arguments require space on the stack and stack space
 	 was not preallocated, allocate stack space here for arguments
 	 passed in registers.  */
@@ -3152,8 +3130,7 @@
       if (pass == 1 && (return_flags & ERF_RETURNS_ARG))
 	{
 	  int arg_nr = return_flags & ERF_RETURN_ARG_MASK;
-	  if (PUSH_ARGS_REVERSED)
-	    arg_nr = num_actuals - arg_nr - 1;
+	  arg_nr = num_actuals - arg_nr - 1;
 	  if (arg_nr >= 0
 	      && arg_nr < num_actuals
 	      && args[arg_nr].reg
@@ -3597,7 +3574,6 @@
      isn't present here, so we default to native calling abi here.  */
   tree fndecl ATTRIBUTE_UNUSED = NULL_TREE; /* library calls default to host calling abi ? */
   tree fntype ATTRIBUTE_UNUSED = NULL_TREE; /* library calls default to host calling abi ? */
-  int inc;
   int count;
   rtx argblock = 0;
   CUMULATIVE_ARGS args_so_far_v;
@@ -3946,22 +3922,13 @@
 	argblock = push_block (GEN_INT (args_size.constant), 0, 0);
     }
 
-  /* If we push args individually in reverse order, perform stack alignment
+  /* We push args individually in reverse order, perform stack alignment
      before the first push (the last arg).  */
-  if (argblock == 0 && PUSH_ARGS_REVERSED)
+  if (argblock == 0)
     anti_adjust_stack (GEN_INT (args_size.constant
 				- original_args_size.constant));
 
-  if (PUSH_ARGS_REVERSED)
-    {
-      inc = -1;
-      argnum = nargs - 1;
-    }
-  else
-    {
-      inc = 1;
-      argnum = 0;
-    }
+  argnum = nargs - 1;
 
 #ifdef REG_PARM_STACK_SPACE
   if (ACCUMULATE_OUTGOING_ARGS)
@@ -3978,7 +3945,7 @@
 
   /* ARGNUM indexes the ARGVEC array in the order in which the arguments
      are to be pushed.  */
-  for (count = 0; count < nargs; count++, argnum += inc)
+  for (count = 0; count < nargs; count++, argnum--)
     {
       enum machine_mode mode = argvec[argnum].mode;
       rtx val = argvec[argnum].value;
@@ -4080,17 +4047,8 @@
 	}
     }
 
-  /* If we pushed args in forward order, perform stack alignment
-     after pushing the last arg.  */
-  if (argblock == 0 && !PUSH_ARGS_REVERSED)
-    anti_adjust_stack (GEN_INT (args_size.constant
-				- original_args_size.constant));
+  argnum = nargs - 1;
 
-  if (PUSH_ARGS_REVERSED)
-    argnum = nargs - 1;
-  else
-    argnum = 0;
-
   fun = prepare_call_address (NULL, fun, NULL, &call_fusage, 0, 0);
 
   /* Now load any reg parms into their regs.  */
@@ -4097,7 +4055,7 @@
 
   /* ARGNUM indexes the ARGVEC array in the order in which the arguments
      are to be pushed.  */
-  for (count = 0; count < nargs; count++, argnum += inc)
+  for (count = 0; count < nargs; count++, argnum--)
     {
       enum machine_mode mode = argvec[argnum].mode;
       rtx val = argvec[argnum].value;
--- a/src/gcc/tree.def
+++ b/src/gcc/tree.def
@@ -1144,10 +1144,9 @@
    result (e.g. summing the elements of the vector, finding the minimum over
    the vector elements, etc).
    Operand 0 is a vector.
-   The expression returns a vector of the same type, with the first
-   element in the vector holding the result of the reduction of all elements
-   of the operand.  The content of the other elements in the returned vector
-   is undefined.  */
+   The expression returns a scalar, with type the same as the elements of the
+   vector, holding the result of the reduction of all elements of the operand.
+   */
 DEFTREECODE (REDUC_MAX_EXPR, "reduc_max_expr", tcc_unary, 1)
 DEFTREECODE (REDUC_MIN_EXPR, "reduc_min_expr", tcc_unary, 1)
 DEFTREECODE (REDUC_PLUS_EXPR, "reduc_plus_expr", tcc_unary, 1)
@@ -1210,10 +1209,9 @@
    before adding operand three.  */
 DEFTREECODE (FMA_EXPR, "fma_expr", tcc_expression, 3)
 
-/* Whole vector left/right shift in bits.
+/* Whole vector right shift in bits.
    Operand 0 is a vector to be shifted.
    Operand 1 is an integer shift amount in bits.  */
-DEFTREECODE (VEC_LSHIFT_EXPR, "vec_lshift_expr", tcc_binary, 2)
 DEFTREECODE (VEC_RSHIFT_EXPR, "vec_rshift_expr", tcc_binary, 2)
 
 /* Widening vector multiplication.
--- a/src/gcc/expmed.c
+++ b/src/gcc/expmed.c
@@ -540,6 +540,21 @@
       copy_back = true;
     }
 
+  /* There are similar overflow check at the start of store_bit_field_1,
+     but that only check the situation where the field lies completely
+     outside the register, while there do have situation where the field
+     lies partialy in the register, we need to adjust bitsize for this
+     partial overflow situation.  Without this fix, pr48335-2.c on big-endian
+     will broken on those arch support bit insert instruction, like arm, aarch64
+     etc.  */
+  if (bitsize + bitnum > unit && bitnum < unit)
+    {
+      warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
+	       "destination object, data truncated into %wu-bit",
+	       bitsize, unit - bitnum);
+      bitsize = unit - bitnum;
+    }
+
   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
      "backwards" from the size of the unit we are inserting into.
      Otherwise, we count bits from the most significant on a
--- a/src/gcc/except.c
+++ b/src/gcc/except.c
@@ -1394,10 +1394,7 @@
 	      {
 		for (loop = bb->loop_father;
 		     loop_outer (loop); loop = loop_outer (loop))
-		  {
-		    loop->header = NULL;
-		    loop->latch = NULL;
-		  }
+		  mark_loop_for_removal (loop);
 	      }
 	  }
 
--- a/src/gcc/cfgexpand.c
+++ b/src/gcc/cfgexpand.c
@@ -1292,7 +1292,12 @@
   else if (TREE_CODE (var) == VAR_DECL && DECL_HARD_REGISTER (var))
     {
       if (really_expand)
-        expand_one_hard_reg_var (var);
+	{
+	  expand_one_hard_reg_var (var);
+	  if (!DECL_HARD_REGISTER (var))
+	    /* Invalid register specification.  */
+	    expand_one_error_var (var);
+	}
     }
   else if (use_register_for_decl (var))
     {
@@ -4545,7 +4550,6 @@
     case REDUC_MIN_EXPR:
     case REDUC_PLUS_EXPR:
     case VEC_COND_EXPR:
-    case VEC_LSHIFT_EXPR:
     case VEC_PACK_FIX_TRUNC_EXPR:
     case VEC_PACK_SAT_EXPR:
     case VEC_PACK_TRUNC_EXPR:
--- a/src/gcc/gimple-fold.c
+++ b/src/gcc/gimple-fold.c
@@ -374,6 +374,9 @@
       {
         tree rhs = gimple_assign_rhs1 (stmt);
 
+	if (TREE_CLOBBER_P (rhs))
+	  return NULL_TREE;
+
 	if (REFERENCE_CLASS_P (rhs))
 	  return maybe_fold_reference (rhs, false);
 
--- a/src/gcc/simplify-rtx.c
+++ b/src/gcc/simplify-rtx.c
@@ -3348,6 +3348,43 @@
 	  && UINTVAL (trueop0) == GET_MODE_MASK (mode)
 	  && ! side_effects_p (op1))
 	return op0;
+      /* Given:
+	 scalar modes M1, M2
+	 scalar constants c1, c2
+	 size (M2) > size (M1)
+	 c1 == size (M2) - size (M1)
+	 optimize:
+	 (ashiftrt:M1 (subreg:M1 (lshiftrt:M2 (reg:M2) (const_int <c1>))
+				 <low_part>)
+		      (const_int <c2>))
+	 to:
+	 (subreg:M1 (ashiftrt:M2 (reg:M2) (const_int <c1 + c2>))
+		    <low_part>).  */
+      if (code == ASHIFTRT
+	  && !VECTOR_MODE_P (mode)
+	  && SUBREG_P (op0)
+	  && CONST_INT_P (op1)
+	  && GET_CODE (SUBREG_REG (op0)) == LSHIFTRT
+	  && !VECTOR_MODE_P (GET_MODE (SUBREG_REG (op0)))
+	  && CONST_INT_P (XEXP (SUBREG_REG (op0), 1))
+	  && (GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op0)))
+	      > GET_MODE_BITSIZE (mode))
+	  && (INTVAL (XEXP (SUBREG_REG (op0), 1))
+	      == (GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op0)))
+		  - GET_MODE_BITSIZE (mode)))
+	  && subreg_lowpart_p (op0))
+	{
+	  rtx tmp = GEN_INT (INTVAL (XEXP (SUBREG_REG (op0), 1))
+			     + INTVAL (op1));
+	  machine_mode inner_mode = GET_MODE (SUBREG_REG (op0));
+	  tmp = simplify_gen_binary (ASHIFTRT,
+				     GET_MODE (SUBREG_REG (op0)),
+				     XEXP (SUBREG_REG (op0), 0),
+				     tmp);
+	  return simplify_gen_subreg (mode, tmp, inner_mode,
+				      subreg_lowpart_offset (mode,
+							     inner_mode));
+	}
     canonicalize_shift:
       if (SHIFT_COUNT_TRUNCATED && CONST_INT_P (op1))
 	{
@@ -4854,6 +4891,32 @@
 				    simplify_gen_binary (XOR, cmp_mode,
 							 XEXP (op0, 1), op1));
 
+  /* (eq/ne (and x y) x) simplifies to (eq/ne (and (not y) x) 0), which
+     can be implemented with a BICS instruction on some targets, or
+     constant-folded if y is a constant.  */
+  if ((code == EQ || code == NE)
+      && op0code == AND
+      && rtx_equal_p (XEXP (op0, 0), op1)
+      && !side_effects_p (op1))
+    {
+      rtx not_y = simplify_gen_unary (NOT, cmp_mode, XEXP (op0, 1), cmp_mode);
+      rtx lhs = simplify_gen_binary (AND, cmp_mode, not_y, XEXP (op0, 0));
+
+      return simplify_gen_relational (code, mode, cmp_mode, lhs, const0_rtx);
+    }
+
+  /* Likewise for (eq/ne (and x y) y).  */
+  if ((code == EQ || code == NE)
+      && op0code == AND
+      && rtx_equal_p (XEXP (op0, 1), op1)
+      && !side_effects_p (op1))
+    {
+      rtx not_x = simplify_gen_unary (NOT, cmp_mode, XEXP (op0, 0), cmp_mode);
+      rtx lhs = simplify_gen_binary (AND, cmp_mode, not_x, XEXP (op0, 1));
+
+      return simplify_gen_relational (code, mode, cmp_mode, lhs, const0_rtx);
+    }
+
   /* (eq/ne (bswap x) C1) simplifies to (eq/ne x C2) with C2 swapped.  */
   if ((code == EQ || code == NE)
       && GET_CODE (op0) == BSWAP
--- a/src/gcc/explow.c
+++ b/src/gcc/explow.c
@@ -329,11 +329,13 @@
    an address in the address space's address mode, or vice versa (TO_MODE says
    which way).  We take advantage of the fact that pointers are not allowed to
    overflow by commuting arithmetic operations over conversions so that address
-   arithmetic insns can be used.  */
+   arithmetic insns can be used. IN_CONST is true if this conversion is inside
+   a CONST.  */
 
-rtx
-convert_memory_address_addr_space (enum machine_mode to_mode ATTRIBUTE_UNUSED,
-				   rtx x, addr_space_t as ATTRIBUTE_UNUSED)
+static rtx
+convert_memory_address_addr_space_1 (enum machine_mode to_mode ATTRIBUTE_UNUSED,
+				     rtx x, addr_space_t as ATTRIBUTE_UNUSED,
+				     bool in_const)
 {
 #ifndef POINTERS_EXTEND_UNSIGNED
   gcc_assert (GET_MODE (x) == to_mode || GET_MODE (x) == VOIDmode);
@@ -389,32 +391,29 @@
 
     case CONST:
       return gen_rtx_CONST (to_mode,
-			    convert_memory_address_addr_space
-			      (to_mode, XEXP (x, 0), as));
+			    convert_memory_address_addr_space_1
+			      (to_mode, XEXP (x, 0), as, true));
       break;
 
     case PLUS:
     case MULT:
-      /* FIXME: For addition, we used to permute the conversion and
-	 addition operation only if one operand is a constant and
-	 converting the constant does not change it or if one operand
-	 is a constant and we are using a ptr_extend instruction
-	 (POINTERS_EXTEND_UNSIGNED < 0) even if the resulting address
-	 may overflow/underflow.  We relax the condition to include
-	 zero-extend (POINTERS_EXTEND_UNSIGNED > 0) since the other
-	 parts of the compiler depend on it.  See PR 49721.
-
+      /* For addition we can safely permute the conversion and addition
+	 operation if one operand is a constant and converting the constant
+	 does not change it or if one operand is a constant and we are
+	 using a ptr_extend instruction  (POINTERS_EXTEND_UNSIGNED < 0).
 	 We can always safely permute them if we are making the address
-	 narrower.  */
+	 narrower. Inside a CONST RTL, this is safe for both pointers
+	 zero or sign extended as pointers cannot wrap. */
       if (GET_MODE_SIZE (to_mode) < GET_MODE_SIZE (from_mode)
 	  || (GET_CODE (x) == PLUS
 	      && CONST_INT_P (XEXP (x, 1))
-	      && (POINTERS_EXTEND_UNSIGNED != 0
-		  || XEXP (x, 1) == convert_memory_address_addr_space
-		  			(to_mode, XEXP (x, 1), as))))
+	      && ((in_const && POINTERS_EXTEND_UNSIGNED != 0)
+		  || XEXP (x, 1) == convert_memory_address_addr_space_1
+				     (to_mode, XEXP (x, 1), as, in_const)
+                  || POINTERS_EXTEND_UNSIGNED < 0)))
 	return gen_rtx_fmt_ee (GET_CODE (x), to_mode,
-			       convert_memory_address_addr_space
-				 (to_mode, XEXP (x, 0), as),
+			       convert_memory_address_addr_space_1
+				 (to_mode, XEXP (x, 0), as, in_const),
 			       XEXP (x, 1));
       break;
 
@@ -426,6 +425,18 @@
 			x, POINTERS_EXTEND_UNSIGNED);
 #endif /* defined(POINTERS_EXTEND_UNSIGNED) */
 }
+
+/* Given X, a memory address in address space AS' pointer mode, convert it to
+   an address in the address space's address mode, or vice versa (TO_MODE says
+   which way).  We take advantage of the fact that pointers are not allowed to
+   overflow by commuting arithmetic operations over conversions so that address
+   arithmetic insns can be used.  */
+
+rtx
+convert_memory_address_addr_space (enum machine_mode to_mode, rtx x, addr_space_t as)
+{
+  return convert_memory_address_addr_space_1 (to_mode, x, as, false);
+}
 
 /* Return something equivalent to X but valid as a memory address for something
    of mode MODE in the named address space AS.  When X is not itself valid,
--- a/src/gcc/lto/ChangeLog.linaro
+++ b/src/gcc/lto/ChangeLog.linaro
@@ -0,0 +1,75 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
--- a/src/gcc/po/ChangeLog.linaro
+++ b/src/gcc/po/ChangeLog.linaro
@@ -0,0 +1,75 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
--- a/src/gcc/varasm.c
+++ b/src/gcc/varasm.c
@@ -1335,6 +1335,11 @@
 	  /* As a register variable, it has no section.  */
 	  return;
 	}
+      /* Avoid internal errors from invalid register
+	 specifications.  */
+      SET_DECL_ASSEMBLER_NAME (decl, NULL_TREE);
+      DECL_HARD_REGISTER (decl) = 0;
+      return;
     }
   /* Now handle ordinary static variables and functions (in memory).
      Also handle vars declared register invalidly.  */
--- a/src/gcc/sched-deps.c
+++ b/src/gcc/sched-deps.c
@@ -2489,7 +2489,7 @@
       /* Pending lists can't get larger with a readonly context.  */
       if (!deps->readonly
           && ((deps->pending_read_list_length + deps->pending_write_list_length)
-              > MAX_PENDING_LIST_LENGTH))
+              >= MAX_PENDING_LIST_LENGTH))
 	{
 	  /* Flush all pending reads and writes to prevent the pending lists
 	     from getting any larger.  Insn scheduling runs too slowly when
@@ -2700,7 +2700,7 @@
 	  {
 	    if ((deps->pending_read_list_length
 		 + deps->pending_write_list_length)
-		> MAX_PENDING_LIST_LENGTH
+		>= MAX_PENDING_LIST_LENGTH
 		&& !DEBUG_INSN_P (insn))
 	      flush_pending_lists (deps, insn, true, true);
 	    add_insn_mem_dependence (deps, true, insn, x);
@@ -2828,35 +2828,41 @@
     sched_deps_info->finish_rhs ();
 }
 
-/* Try to group comparison and the following conditional jump INSN if
-   they're already adjacent. This is to prevent scheduler from scheduling
-   them apart.  */
+/* Try to group two fuseable insns together to prevent scheduler
+   from scheduling them apart.  */
 
 static void
-try_group_insn (rtx insn)
+sched_macro_fuse_insns (rtx insn)
 {
-  unsigned int condreg1, condreg2;
-  rtx cc_reg_1;
   rtx prev;
 
-  if (!any_condjump_p (insn))
-    return;
+  if (any_condjump_p (insn))
+    {
+      unsigned int condreg1, condreg2;
+      rtx cc_reg_1;
+      targetm.fixed_condition_code_regs (&condreg1, &condreg2);
+      cc_reg_1 = gen_rtx_REG (CCmode, condreg1);
+      prev = prev_nonnote_nondebug_insn (insn);
+      if (!reg_referenced_p (cc_reg_1, PATTERN (insn))
+          || !prev
+          || !modified_in_p (cc_reg_1, prev))
+        return;
+    }
+  else
+    {
+      rtx insn_set = single_set (insn);
 
-  targetm.fixed_condition_code_regs (&condreg1, &condreg2);
-  cc_reg_1 = gen_rtx_REG (CCmode, condreg1);
-  prev = prev_nonnote_nondebug_insn (insn);
-  if (!reg_referenced_p (cc_reg_1, PATTERN (insn))
-      || !prev
-      || !modified_in_p (cc_reg_1, prev))
-    return;
+      prev = prev_nonnote_nondebug_insn (insn);
+      if (!prev
+          || !insn_set
+          || !single_set (prev))
+        return;
 
-  /* Different microarchitectures support macro fusions for different
-     combinations of insn pairs.  */
-  if (!targetm.sched.macro_fusion_pair_p
-      || !targetm.sched.macro_fusion_pair_p (prev, insn))
-    return;
+    }
 
-  SCHED_GROUP_P (insn) = 1;
+  if (targetm.sched.macro_fusion_pair_p (prev, insn))
+    SCHED_GROUP_P (insn) = 1;
+
 }
 
 /* Analyze an INSN with pattern X to find all dependencies.  */
@@ -2885,7 +2891,7 @@
   /* Group compare and branch insns for macro-fusion.  */
   if (targetm.sched.macro_fusion_p
       && targetm.sched.macro_fusion_p ())
-    try_group_insn (insn);
+    sched_macro_fuse_insns (insn);
 
   if (may_trap_p (x))
     /* Avoid moving trapping instructions across function calls that might
@@ -3195,8 +3201,8 @@
 	  EXECUTE_IF_SET_IN_REG_SET (reg_pending_clobbers, 0, i, rsi)
 	    {
 	      struct deps_reg *reg_last = &deps->reg_last[i];
-	      if (reg_last->uses_length > MAX_PENDING_LIST_LENGTH
-		  || reg_last->clobbers_length > MAX_PENDING_LIST_LENGTH)
+	      if (reg_last->uses_length >= MAX_PENDING_LIST_LENGTH
+		  || reg_last->clobbers_length >= MAX_PENDING_LIST_LENGTH)
 		{
 		  add_dependence_list_and_free (deps, insn, &reg_last->sets, 0,
 						REG_DEP_OUTPUT, false);
@@ -3629,7 +3635,7 @@
                && sel_insn_is_speculation_check (insn)))
         {
           /* Keep the list a reasonable size.  */
-          if (deps->pending_flush_length++ > MAX_PENDING_LIST_LENGTH)
+          if (deps->pending_flush_length++ >= MAX_PENDING_LIST_LENGTH)
             flush_pending_lists (deps, insn, true, true);
           else
 	    deps->pending_jump_insns
--- a/src/gcc/rtl.h
+++ b/src/gcc/rtl.h
@@ -472,6 +472,9 @@
 /* Predicate yielding nonzero iff X is a data for a jump table.  */
 #define JUMP_TABLE_DATA_P(INSN) (GET_CODE (INSN) == JUMP_TABLE_DATA)
 
+/* Predicate yielding nonzero iff RTX is a subreg.  */
+#define SUBREG_P(RTX) (GET_CODE (RTX) == SUBREG)
+
 /* Predicate yielding nonzero iff X is a return or simple_return.  */
 #define ANY_RETURN_P(X) \
   (GET_CODE (X) == RETURN || GET_CODE (X) == SIMPLE_RETURN)
--- a/src/gcc/tree-inline.c
+++ b/src/gcc/tree-inline.c
@@ -2336,11 +2336,8 @@
 
 	  /* Assign the new loop its header and latch and associate
 	     those with the new loop.  */
-	  if (src_loop->header != NULL)
-	    {
-	      dest_loop->header = (basic_block)src_loop->header->aux;
-	      dest_loop->header->loop_father = dest_loop;
-	    }
+	  dest_loop->header = (basic_block)src_loop->header->aux;
+	  dest_loop->header->loop_father = dest_loop;
 	  if (src_loop->latch != NULL)
 	    {
 	      dest_loop->latch = (basic_block)src_loop->latch->aux;
@@ -3649,7 +3646,6 @@
     case RSHIFT_EXPR:
     case LROTATE_EXPR:
     case RROTATE_EXPR:
-    case VEC_LSHIFT_EXPR:
     case VEC_RSHIFT_EXPR:
 
     case BIT_IOR_EXPR:
@@ -5506,6 +5502,11 @@
   delete_unreachable_blocks_update_callgraph (&id);
   if (id.dst_node->definition)
     cgraph_rebuild_references ();
+  if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
+    {
+      calculate_dominance_info (CDI_DOMINATORS);
+      fix_loop_structure (NULL);
+    }
   update_ssa (TODO_update_ssa);
 
   /* After partial cloning we need to rescale frequencies, so they are
--- a/src/gcc/sched-int.h
+++ b/src/gcc/sched-int.h
@@ -170,7 +170,7 @@
   int n_debug;
 };
 
-extern char *ready_try;
+extern signed char *ready_try;
 extern struct ready_list ready;
 
 extern int max_issue (struct ready_list *, int, state_t, bool, int *);
@@ -794,6 +794,32 @@
   struct reg_set_data *next_insn_set;
 };
 
+enum autopref_multipass_data_status {
+  /* Entry is irrelevant for auto-prefetcher.  */
+  AUTOPREF_MULTIPASS_DATA_IRRELEVANT = -2,
+  /* Entry is uninitialized.  */
+  AUTOPREF_MULTIPASS_DATA_UNINITIALIZED = -1,
+  /* Entry is relevant for auto-prefetcher and insn can be delayed
+     to allow another insn through.  */
+  AUTOPREF_MULTIPASS_DATA_NORMAL = 0,
+  /* Entry is relevant for auto-prefetcher, but insn should not be
+     delayed as that will break scheduling.  */
+  AUTOPREF_MULTIPASS_DATA_DONT_DELAY = 1
+};
+
+/* Data for modeling cache auto-prefetcher.  */
+struct autopref_multipass_data_
+{
+  /* Base part of memory address.  */
+  rtx base;
+  /* Memory offset.  */
+  int offset;
+  /* Entry status.  */
+  enum autopref_multipass_data_status status;
+};
+typedef struct autopref_multipass_data_ autopref_multipass_data_def;
+typedef autopref_multipass_data_def *autopref_multipass_data_t;
+
 struct _haifa_insn_data
 {
   /* We can't place 'struct _deps_list' into h_i_d instead of deps_list_t
@@ -888,6 +914,16 @@
      pressure excess (between source and target).  */
   int reg_pressure_excess_cost_change;
   int model_index;
+
+  /* Original order of insns in the ready list.  */
+  int rfs_debug_orig_order;
+
+  /* The deciding reason for INSN's place in the ready list.  */
+  int last_rfs_win;
+
+  /* Two entries for cache auto-prefetcher model: one for mem reads,
+     and one for mem writes.  */
+  autopref_multipass_data_def autopref_multipass_data[2];
 };
 
 typedef struct _haifa_insn_data haifa_insn_data_def;
@@ -909,6 +945,8 @@
   (HID (INSN)->reg_pressure_excess_cost_change)
 #define INSN_PRIORITY_STATUS(INSN) (HID (INSN)->priority_status)
 #define INSN_MODEL_INDEX(INSN) (HID (INSN)->model_index)
+#define INSN_AUTOPREF_MULTIPASS_DATA(INSN) \
+  (HID (INSN)->autopref_multipass_data)
 
 typedef struct _haifa_deps_insn_data haifa_deps_insn_data_def;
 typedef haifa_deps_insn_data_def *haifa_deps_insn_data_t;
@@ -1141,9 +1179,7 @@
 
 enum SPEC_SCHED_FLAGS {
   COUNT_SPEC_IN_CRITICAL_PATH = 1,
-  PREFER_NON_DATA_SPEC = COUNT_SPEC_IN_CRITICAL_PATH << 1,
-  PREFER_NON_CONTROL_SPEC = PREFER_NON_DATA_SPEC << 1,
-  SEL_SCHED_SPEC_DONT_CHECK_CONTROL = PREFER_NON_CONTROL_SPEC << 1
+  SEL_SCHED_SPEC_DONT_CHECK_CONTROL = COUNT_SPEC_IN_CRITICAL_PATH << 1
 };
 
 #define NOTE_NOT_BB_P(NOTE) (NOTE_P (NOTE) && (NOTE_KIND (NOTE)	\
@@ -1357,7 +1393,8 @@
 extern int issue_rate;
 extern int dfa_lookahead;
 
-extern void ready_sort (struct ready_list *);
+extern int autopref_multipass_dfa_lookahead_guard (rtx, int);
+
 extern rtx ready_element (struct ready_list *, int);
 extern rtx *ready_lastpos (struct ready_list *);
 
--- a/src/gcc/cfgloop.c
+++ b/src/gcc/cfgloop.c
@@ -1919,3 +1919,16 @@
 {
   return bb->loop_father ? loop_depth (bb->loop_father) : 0;
 }
+
+/* Marks LOOP for removal and sets LOOPS_NEED_FIXUP.  */
+
+void
+mark_loop_for_removal (loop_p loop)
+{
+  if (loop->header == NULL)
+    return;
+  loop->former_header = loop->header;
+  loop->header = NULL;
+  loop->latch = NULL;
+  loops_state_set (LOOPS_NEED_FIXUP);
+}
--- a/src/gcc/system.h
+++ b/src/gcc/system.h
@@ -830,7 +830,8 @@
 	CAN_DEBUG_WITHOUT_FP UNLIKELY_EXECUTED_TEXT_SECTION_NAME	\
 	HOT_TEXT_SECTION_NAME LEGITIMATE_CONSTANT_P ALWAYS_STRIP_DOTDOT	\
 	OUTPUT_ADDR_CONST_EXTRA SMALL_REGISTER_CLASSES ASM_OUTPUT_IDENT	\
-	ASM_BYTE_OP MEMBER_TYPE_FORCES_BLK
+	ASM_BYTE_OP MEMBER_TYPE_FORCES_BLK CLEAR_BY_PIECES_P		\
+ 	MOVE_BY_PIECES_P SET_BY_PIECES_P STORE_BY_PIECES_P
 
 /* Target macros only used for code built for the target, that have
    moved to libgcc-tm.h or have never been present elsewhere.  */
@@ -912,7 +913,8 @@
 	USE_COMMON_FOR_ONE_ONLY IFCVT_EXTRA_FIELDS IFCVT_INIT_EXTRA_FIELDS \
 	CASE_USE_BIT_TESTS FIXUNS_TRUNC_LIKE_FIX_TRUNC                     \
         GO_IF_MODE_DEPENDENT_ADDRESS DELAY_SLOTS_FOR_EPILOGUE              \
-        ELIGIBLE_FOR_EPILOGUE_DELAY TARGET_C99_FUNCTIONS TARGET_HAS_SINCOS
+        ELIGIBLE_FOR_EPILOGUE_DELAY TARGET_C99_FUNCTIONS TARGET_HAS_SINCOS \
+	LARGEST_EXPONENT_IS_NORNAL ROUND_TOWARDS_ZERO
 
 /* Hooks that are no longer used.  */
  #pragma GCC poison LANG_HOOKS_FUNCTION_MARK LANG_HOOKS_FUNCTION_FREE	\
--- a/src/gcc/cfgloop.h
+++ b/src/gcc/cfgloop.h
@@ -192,6 +192,12 @@
 
   /* Number of iteration analysis data for RTL.  */
   struct niter_desc *simple_loop_desc;
+
+  /* For sanity checking during loop fixup we record here the former
+     loop header for loops marked for removal.  Note that this prevents
+     the basic-block from being collected but its index can still be
+     reused.  */
+  basic_block former_header;
 };
 
 /* Flags for state of loop structure.  */
@@ -335,7 +341,9 @@
 extern bool remove_path (edge);
 extern void unloop (struct loop *, bool *, bitmap);
 extern void scale_loop_frequencies (struct loop *, int, int);
+void mark_loop_for_removal (loop_p);
 
+
 /* Induction variable analysis.  */
 
 /* The description of induction variable.  The things are a bit complicated
--- a/src/gcc/tree-vect-generic.c
+++ b/src/gcc/tree-vect-generic.c
@@ -1656,7 +1656,7 @@
   if (compute_type == type)
     return;
 
-  gcc_assert (code != VEC_LSHIFT_EXPR && code != VEC_RSHIFT_EXPR);
+  gcc_assert (code != VEC_RSHIFT_EXPR);
   new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code);
 
   /* Leave expression untouched for later expansion.  */
--- a/src/gcc/config.gcc
+++ b/src/gcc/config.gcc
@@ -312,8 +312,9 @@
 aarch64*-*-*)
 	cpu_type=aarch64
 	need_64bit_hwint=yes
-	extra_headers="arm_neon.h"
+	extra_headers="arm_neon.h arm_acle.h"
 	extra_objs="aarch64-builtins.o aarch-common.o"
+	target_gtfiles="\$(srcdir)/config/aarch64/aarch64-builtins.c"
 	target_has_targetm_common=yes
 	;;
 alpha*-*-*)
--- a/src/gcc/Makefile.in
+++ b/src/gcc/Makefile.in
@@ -814,10 +814,12 @@
 DEVPHASE    := $(srcdir)/DEV-PHASE # experimental, prerelease, ""
 DATESTAMP   := $(srcdir)/DATESTAMP # YYYYMMDD or empty
 REVISION    := $(srcdir)/REVISION  # [BRANCH revision XXXXXX]
+LINAROVER   := $(srcdir)/LINARO-VERSION # M.x-YYYY.MM[-S][~dev]
 
 BASEVER_c   := $(shell cat $(BASEVER))
 DEVPHASE_c  := $(shell cat $(DEVPHASE))
 DATESTAMP_c := $(shell cat $(DATESTAMP))
+LINAROVER_c := $(shell cat $(LINAROVER))
 
 ifeq (,$(wildcard $(REVISION)))
 REVISION_c  :=
@@ -838,6 +840,7 @@
 DATESTAMP_s := "\"$(if $(DEVPHASE_c), $(DATESTAMP_c))\""
 PKGVERSION_s:= "\"@PKGVERSION@\""
 BUGURL_s    := "\"@REPORT_BUGS_TO@\""
+LINAROVER_s := "\"$(LINAROVER_c)\""
 
 PKGVERSION  := @PKGVERSION@
 BUGURL_TEXI := @REPORT_BUGS_TEXI@
@@ -2542,8 +2545,9 @@
   -DSTANDARD_EXEC_PREFIX=\"$(libdir)/gcc/\" \
   @TARGET_SYSTEM_ROOT_DEFINE@
 
-CFLAGS-cppbuiltin.o += $(PREPROCESSOR_DEFINES) -DBASEVER=$(BASEVER_s)
-cppbuiltin.o: $(BASEVER)
+CFLAGS-cppbuiltin.o += $(PREPROCESSOR_DEFINES) -DBASEVER=$(BASEVER_s) \
+	-DLINAROVER=$(LINAROVER_s)
+cppbuiltin.o: $(BASEVER) $(LINAROVER)
 
 CFLAGS-cppdefault.o += $(PREPROCESSOR_DEFINES)
 
@@ -2799,8 +2803,7 @@
 	 gcov.texi trouble.texi bugreport.texi service.texi		\
 	 contribute.texi compat.texi funding.texi gnu.texi gpl_v3.texi	\
 	 fdl.texi contrib.texi cppenv.texi cppopts.texi avr-mmcu.texi	\
-	 implement-c.texi implement-cxx.texi arm-neon-intrinsics.texi	\
-	 arm-acle-intrinsics.texi
+	 implement-c.texi implement-cxx.texi
 
 # we explicitly use $(srcdir)/doc/tm.texi here to avoid confusion with
 # the generated tm.texi; the latter might have a more recent timestamp,
--- a/src/gcc/tree-cfg.c
+++ b/src/gcc/tree-cfg.c
@@ -2594,7 +2594,7 @@
    near its "logical" location.  This is of most help to humans looking
    at debugging dumps.  */
 
-static basic_block
+basic_block
 split_edge_bb_loc (edge edge_in)
 {
   basic_block dest = edge_in->dest;
@@ -3517,12 +3517,21 @@
 
         return false;
       }
+    case REDUC_MAX_EXPR:
+    case REDUC_MIN_EXPR:
+    case REDUC_PLUS_EXPR:
+      if (!VECTOR_TYPE_P (rhs1_type)
+	  || !useless_type_conversion_p (lhs_type, TREE_TYPE (rhs1_type)))
+        {
+	  error ("reduction should convert from vector to element type");
+	  debug_generic_expr (lhs_type);
+	  debug_generic_expr (rhs1_type);
+	  return true;
+	}
+      return false;
 
     case VEC_UNPACK_HI_EXPR:
     case VEC_UNPACK_LO_EXPR:
-    case REDUC_MAX_EXPR:
-    case REDUC_MIN_EXPR:
-    case REDUC_PLUS_EXPR:
     case VEC_UNPACK_FLOAT_HI_EXPR:
     case VEC_UNPACK_FLOAT_LO_EXPR:
       /* FIXME.  */
@@ -3629,7 +3638,6 @@
 	return false;
       }
 
-    case VEC_LSHIFT_EXPR:
     case VEC_RSHIFT_EXPR:
       {
 	if (TREE_CODE (rhs1_type) != VECTOR_TYPE
--- a/src/gcc/tree-cfg.h
+++ b/src/gcc/tree-cfg.h
@@ -62,6 +62,7 @@
 extern tree gimple_block_label (basic_block);
 extern void add_phi_args_after_copy_bb (basic_block);
 extern void add_phi_args_after_copy (basic_block *, unsigned, edge);
+extern basic_block split_edge_bb_loc (edge);
 extern bool gimple_duplicate_sese_region (edge, edge, basic_block *, unsigned,
 					basic_block *, bool);
 extern bool gimple_duplicate_sese_tail (edge, edge, basic_block *, unsigned,
--- a/src/gcc/ree.c
+++ b/src/gcc/ree.c
@@ -793,6 +793,14 @@
       if (state->modified[INSN_UID (cand->insn)].kind != EXT_MODIFIED_NONE)
 	return false;
 
+      enum machine_mode dst_mode = GET_MODE (SET_DEST (PATTERN (cand->insn)));
+      rtx src_reg = get_extended_src_reg (SET_SRC (PATTERN (cand->insn)));
+
+      /* Ensure the number of hard registers of the copy match.  */
+      if (HARD_REGNO_NREGS (REGNO (src_reg), dst_mode)
+	  != HARD_REGNO_NREGS (REGNO (src_reg), GET_MODE (src_reg)))
+	return false;
+
       /* There's only one reaching def.  */
       rtx def_insn = state->defs_list[0];
 
@@ -842,7 +850,7 @@
       start_sequence ();
       rtx pat = PATTERN (cand->insn);
       rtx new_dst = gen_rtx_REG (GET_MODE (SET_DEST (pat)),
-                                 REGNO (XEXP (SET_SRC (pat), 0)));
+                                 REGNO (get_extended_src_reg (SET_SRC (pat))));
       rtx new_src = gen_rtx_REG (GET_MODE (SET_DEST (pat)),
                                  REGNO (SET_DEST (pat)));
       emit_move_insn (new_dst, new_src);
--- a/src/gcc/config/s390/s390.c
+++ b/src/gcc/config/s390/s390.c
@@ -12122,6 +12122,18 @@
   register_pass (&insert_pass_s390_early_mach);
 }
 
+/* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P.  */
+
+static bool
+s390_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
+				     unsigned int align ATTRIBUTE_UNUSED,
+				     enum by_pieces_operation op ATTRIBUTE_UNUSED,
+				     bool speed_p ATTRIBUTE_UNUSED)
+{
+  return (size == 1 || size == 2
+	  || size == 4 || (TARGET_ZARCH && size == 8));
+}
+
 /* Initialize GCC target structure.  */
 
 #undef  TARGET_ASM_ALIGNED_HI_OP
@@ -12304,6 +12316,10 @@
 #undef TARGET_SET_UP_BY_PROLOGUE
 #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
 
+#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
+#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
+  s390_use_by_pieces_infrastructure_p
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-s390.h"
--- a/src/gcc/config/s390/s390.h
+++ b/src/gcc/config/s390/s390.h
@@ -752,24 +752,6 @@
 #define MOVE_MAX_PIECES (TARGET_ZARCH ? 8 : 4)
 #define MAX_MOVE_MAX 16
 
-/* Determine whether to use move_by_pieces or block move insn.  */
-#define MOVE_BY_PIECES_P(SIZE, ALIGN)		\
-  ( (SIZE) == 1 || (SIZE) == 2 || (SIZE) == 4	\
-    || (TARGET_ZARCH && (SIZE) == 8) )
-
-/* Determine whether to use clear_by_pieces or block clear insn.  */
-#define CLEAR_BY_PIECES_P(SIZE, ALIGN)		\
-  ( (SIZE) == 1 || (SIZE) == 2 || (SIZE) == 4	\
-    || (TARGET_ZARCH && (SIZE) == 8) )
-
-/* This macro is used to determine whether store_by_pieces should be
-   called to "memcpy" storage when the source is a constant string.  */
-#define STORE_BY_PIECES_P(SIZE, ALIGN) MOVE_BY_PIECES_P (SIZE, ALIGN)
-
-/* Likewise to decide whether to "memset" storage with byte values
-   other than zero.  */
-#define SET_BY_PIECES_P(SIZE, ALIGN) STORE_BY_PIECES_P (SIZE, ALIGN)
-
 /* Don't perform CSE on function addresses.  */
 #define NO_FUNCTION_CSE
 
--- a/src/gcc/config/i386/i386.c
+++ b/src/gcc/config/i386/i386.c
@@ -25878,6 +25878,9 @@
   rtx compare_set = NULL_RTX, test_if, cond;
   rtx alu_set = NULL_RTX, addr = NULL_RTX;
 
+  if (!any_condjump_p (condjmp))
+    return false;
+
   if (get_attr_type (condgen) != TYPE_TEST
       && get_attr_type (condgen) != TYPE_ICMP
       && get_attr_type (condgen) != TYPE_INCDEC
@@ -26448,7 +26451,7 @@
 static void
 core2i7_first_cycle_multipass_filter_ready_try
 (const_ix86_first_cycle_multipass_data_t data,
- char *ready_try, int n_ready, bool first_cycle_insn_p)
+ signed char *ready_try, int n_ready, bool first_cycle_insn_p)
 {
   while (n_ready--)
     {
@@ -26480,7 +26483,8 @@
 
 /* Prepare for a new round of multipass lookahead scheduling.  */
 static void
-core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
+core2i7_first_cycle_multipass_begin (void *_data,
+				     signed char *ready_try, int n_ready,
 				     bool first_cycle_insn_p)
 {
   ix86_first_cycle_multipass_data_t data
@@ -26501,7 +26505,8 @@
 /* INSN is being issued in current solution.  Account for its impact on
    the decoder model.  */
 static void
-core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
+core2i7_first_cycle_multipass_issue (void *_data,
+				     signed char *ready_try, int n_ready,
 				     rtx insn, const void *_prev_data)
 {
   ix86_first_cycle_multipass_data_t data
@@ -26539,7 +26544,7 @@
 /* Revert the effect on ready_try.  */
 static void
 core2i7_first_cycle_multipass_backtrack (const void *_data,
-					 char *ready_try,
+					 signed char *ready_try,
 					 int n_ready ATTRIBUTE_UNUSED)
 {
   const_ix86_first_cycle_multipass_data_t data
--- a/src/gcc/config/sh/sh.c
+++ b/src/gcc/config/sh/sh.c
@@ -317,6 +317,10 @@
 static bool sh_legitimate_constant_p (enum machine_mode, rtx);
 static int mov_insn_size (enum machine_mode, bool);
 static int mov_insn_alignment_mask (enum machine_mode, bool);
+static bool sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
+					       unsigned int,
+					       enum by_pieces_operation,
+					       bool);
 static bool sequence_insn_p (rtx);
 static void sh_canonicalize_comparison (int *, rtx *, rtx *, bool);
 static void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&,
@@ -601,6 +605,10 @@
 #undef TARGET_FIXED_CONDITION_CODE_REGS
 #define TARGET_FIXED_CONDITION_CODE_REGS sh_fixed_condition_code_regs
 
+#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
+#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
+  sh_use_by_pieces_infrastructure_p
+
 /* Machine-specific symbol_ref flags.  */
 #define SYMBOL_FLAG_FUNCVEC_FUNCTION	(SYMBOL_FLAG_MACH_DEP << 0)
 
@@ -13394,4 +13402,27 @@
   return NULL_RTX;
 }
 
+/* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P.  */
+
+static bool
+sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
+				   unsigned int align,
+				   enum by_pieces_operation op,
+				   bool speed_p)
+{
+  switch (op)
+    {
+      case MOVE_BY_PIECES:
+	return move_by_pieces_ninsns (size, align, MOVE_MAX_PIECES + 1)
+	  < (!speed_p ? 2 : (align >= 32) ? 16 : 2);
+      case STORE_BY_PIECES:
+      case SET_BY_PIECES:
+	return move_by_pieces_ninsns (size, align, STORE_MAX_PIECES + 1)
+	  < (!speed_p ? 2 : (align >= 32) ? 16 : 2);
+      default:
+	return default_use_by_pieces_infrastructure_p (size, align,
+						       op, speed_p);
+    }
+}
+
 #include "gt-sh.h"
--- a/src/gcc/config/sh/sh.h
+++ b/src/gcc/config/sh/sh.h
@@ -1584,16 +1584,6 @@
 #define USE_STORE_PRE_DECREMENT(mode)    ((mode == SImode || mode == DImode) \
 					  ? 0 : TARGET_SH1)
 
-#define MOVE_BY_PIECES_P(SIZE, ALIGN) \
-  (move_by_pieces_ninsns (SIZE, ALIGN, MOVE_MAX_PIECES + 1) \
-   < (optimize_size ? 2 : ((ALIGN >= 32) ? 16 : 2)))
-
-#define STORE_BY_PIECES_P(SIZE, ALIGN) \
-  (move_by_pieces_ninsns (SIZE, ALIGN, STORE_MAX_PIECES + 1) \
-   < (optimize_size ? 2 : ((ALIGN >= 32) ? 16 : 2)))
-
-#define SET_BY_PIECES_P(SIZE, ALIGN) STORE_BY_PIECES_P(SIZE, ALIGN)
-
 /* Macros to check register numbers against specific register classes.  */
 
 /* These assume that REGNO is a hard or pseudo reg number.
--- a/src/gcc/config/host-linux.c
+++ b/src/gcc/config/host-linux.c
@@ -86,6 +86,8 @@
 # define TRY_EMPTY_VM_SPACE	0x60000000
 #elif defined(__mc68000__)
 # define TRY_EMPTY_VM_SPACE	0x40000000
+#elif defined(__aarch64__) && defined(__ILP32__)
+# define TRY_EMPTY_VM_SPACE	0x60000000
 #elif defined(__aarch64__)
 # define TRY_EMPTY_VM_SPACE	0x1000000000
 #elif defined(__ARM_EABI__)
--- a/src/gcc/config/cris/cris.h
+++ b/src/gcc/config/cris/cris.h
@@ -80,15 +80,7 @@
 /* Which CPU version this is.  The parsed and adjusted cris_cpu_str.  */
 extern int cris_cpu_version;
 
-/* Changing the order used to be necessary to put the fourth __make_dp
-   argument (a DImode parameter) in registers, to fit with the libfunc
-   parameter passing scheme used for intrinsic functions.  FIXME: Check
-   performance.  */
-#ifdef IN_LIBGCC2
-#define __make_dp(a,b,c,d) __cris_make_dp(d,a,b,c)
-#endif
 
-
 /* Node: Driver */
 
 /* Also provide canonical vN definitions when user specifies an alias.  */
--- a/src/gcc/config/ia64/ia64.c
+++ b/src/gcc/config/ia64/ia64.c
@@ -169,8 +169,7 @@
 static void ia64_dependencies_evaluation_hook (rtx, rtx);
 static void ia64_init_dfa_pre_cycle_insn (void);
 static rtx ia64_dfa_pre_cycle_insn (void);
-static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
-static bool ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx);
+static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx, int);
 static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
 static void ia64_h_i_d_extended (void);
 static void * ia64_alloc_sched_context (void);
@@ -496,10 +495,6 @@
 #undef TARGET_SCHED_GEN_SPEC_CHECK
 #define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check
 
-#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC
-#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC\
-  ia64_first_cycle_multipass_dfa_lookahead_guard_spec
-
 #undef TARGET_SCHED_SKIP_RTX_P
 #define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p
 
@@ -7531,32 +7526,30 @@
   return 1;
 }
 
-/* We are choosing insn from the ready queue.  Return nonzero if INSN
+/* We are choosing insn from the ready queue.  Return zero if INSN
    can be chosen.  */
 
 static int
-ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn)
+ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn, int ready_index)
 {
   gcc_assert (insn && INSN_P (insn));
-  return ((!reload_completed
-	   || !safe_group_barrier_needed (insn))
-	  && ia64_first_cycle_multipass_dfa_lookahead_guard_spec (insn)
-	  && (!mflag_sched_mem_insns_hard_limit
-	      || !is_load_p (insn)
-	      || mem_ops_in_group[current_cycle % 4] < ia64_max_memory_insns));
-}
 
-/* We are choosing insn from the ready queue.  Return nonzero if INSN
-   can be chosen.  */
+  /* Size of ALAT is 32.  As far as we perform conservative
+     data speculation, we keep ALAT half-empty.  */
+  if ((TODO_SPEC (insn) & BEGIN_DATA) && pending_data_specs >= 16)
+    return ready_index == 0 ? -1 : 1;
 
-static bool
-ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx insn)
-{
-  gcc_assert (insn  && INSN_P (insn));
-  /* Size of ALAT is 32.  As far as we perform conservative data speculation,
-     we keep ALAT half-empty.  */
-  return (pending_data_specs < 16
-	  || !(TODO_SPEC (insn) & BEGIN_DATA));
+  if (ready_index == 0)
+    return 0;
+
+  if ((!reload_completed
+       || !safe_group_barrier_needed (insn))
+      && (!mflag_sched_mem_insns_hard_limit
+	  || !is_load_p (insn)
+	  || mem_ops_in_group[current_cycle % 4] < ia64_max_memory_insns))
+    return 0;
+
+  return 1;
 }
 
 /* The following variable value is pseudo-insn used by the DFA insn
@@ -7943,18 +7936,10 @@
 	  
 	  spec_info->flags = 0;
       
-	  if ((mask & DATA_SPEC) && mflag_sched_prefer_non_data_spec_insns)
-	    spec_info->flags |= PREFER_NON_DATA_SPEC;
+	  if ((mask & CONTROL_SPEC)
+	      && sel_sched_p () && mflag_sel_sched_dont_check_control_spec)
+	    spec_info->flags |= SEL_SCHED_SPEC_DONT_CHECK_CONTROL;
 
-	  if (mask & CONTROL_SPEC)
-	    {
-	      if (mflag_sched_prefer_non_control_spec_insns)
-		spec_info->flags |= PREFER_NON_CONTROL_SPEC;
-
-	      if (sel_sched_p () && mflag_sel_sched_dont_check_control_spec)
-		spec_info->flags |= SEL_SCHED_SPEC_DONT_CHECK_CONTROL;
-	    }
-
 	  if (sched_verbose >= 1)
 	    spec_info->dump = sched_dump;
 	  else
--- a/src/gcc/config/ia64/ia64.opt
+++ b/src/gcc/config/ia64/ia64.opt
@@ -164,12 +164,10 @@
 Use simple data speculation check for control speculation
 
 msched-prefer-non-data-spec-insns
-Target Report Var(mflag_sched_prefer_non_data_spec_insns) Init(0)
-If set, data speculative instructions will be chosen for schedule only if there are no other choices at the moment 
+Target Ignore Warn(switch %qs is no longer supported)
 
 msched-prefer-non-control-spec-insns
-Target Report Var(mflag_sched_prefer_non_control_spec_insns) Init(0)
-If set, control speculative instructions will be chosen for schedule only if there are no other choices at the moment 
+Target Ignore Warn(switch %qs is no longer supported)
 
 msched-count-spec-in-critical-path
 Target Report Var(mflag_sched_count_spec_in_critical_path) Init(0)
--- a/src/gcc/config/aarch64/geniterators.sh
+++ b/src/gcc/config/aarch64/geniterators.sh
@@ -0,0 +1,45 @@
+#!/bin/sh
+#
+# Copyright (C) 2014 Free Software Foundation, Inc.
+# Contributed by ARM Ltd.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Generate aarch64-builtin-iterators.h, a file containing a series of
+# BUILTIN_<ITERATOR> macros, which expand to VAR<N> Macros covering the
+# same set of modes as the iterator in iterators.md
+
+echo "/* -*- buffer-read-only: t -*- */"
+echo "/* Generated automatically by geniterators.sh from iterators.md.  */"
+echo "#ifndef GCC_AARCH64_ITERATORS_H"
+echo "#define GCC_AARCH64_ITERATORS_H"
+
+# Strip newlines, create records marked ITERATOR, and strip junk (anything
+# which does not have a matching brace because it contains characters we
+# don't want to or can't handle (e.g P, PTR iterators change depending on
+# Pmode and ptr_mode).
+cat $1 | tr "\n" " " \
+       | sed 's/(define_mode_iterator \([A-Za-z0-9_]*\) \([]\[A-Z0-9 \t]*\)/\n#define BUILTIN_\1(T, N, MAP) \\ \2\n/g' \
+       | grep '#define [A-Z0-9_(), \\]* \[[A-Z0-9[:space:]]*]' \
+       | sed 's/\t//g' \
+       | sed 's/  \+/ /g' \
+       | sed 's/ \[\([A-Z0-9 ]*\)]/\n\L\1/' \
+       | awk ' BEGIN { FS = " " ; OFS = ", "} \
+	       /#/ { print } \
+               ! /#/ { $1 = $1 ; printf "  VAR%d (T, N, MAP, %s)\n", NF, $0 }'
+
+echo "#endif /* GCC_AARCH64_ITERATORS_H  */"
--- a/src/gcc/config/aarch64/aarch64-simd.md
+++ b/src/gcc/config/aarch64/aarch64-simd.md
@@ -19,8 +19,8 @@
 ;; <http://www.gnu.org/licenses/>.
 
 (define_expand "mov<mode>"
-  [(set (match_operand:VALL 0 "aarch64_simd_nonimmediate_operand" "")
-	(match_operand:VALL 1 "aarch64_simd_general_operand" ""))]
+  [(set (match_operand:VALL 0 "nonimmediate_operand" "")
+	(match_operand:VALL 1 "general_operand" ""))]
   "TARGET_SIMD"
   "
     if (GET_CODE (operands[0]) == MEM)
@@ -29,8 +29,8 @@
 )
 
 (define_expand "movmisalign<mode>"
-  [(set (match_operand:VALL 0 "aarch64_simd_nonimmediate_operand" "")
-        (match_operand:VALL 1 "aarch64_simd_general_operand" ""))]
+  [(set (match_operand:VALL 0 "nonimmediate_operand" "")
+        (match_operand:VALL 1 "general_operand" ""))]
   "TARGET_SIMD"
 {
   /* This pattern is not permitted to fail during expansion: if both arguments
@@ -91,9 +91,9 @@
 )
 
 (define_insn "*aarch64_simd_mov<mode>"
-  [(set (match_operand:VD 0 "aarch64_simd_nonimmediate_operand"
+  [(set (match_operand:VD 0 "nonimmediate_operand"
 		"=w, m,  w, ?r, ?w, ?r, w")
-	(match_operand:VD 1 "aarch64_simd_general_operand"
+	(match_operand:VD 1 "general_operand"
 		"m,  w,  w,  w,  r,  r, Dn"))]
   "TARGET_SIMD
    && (register_operand (operands[0], <MODE>mode)
@@ -119,9 +119,9 @@
 )
 
 (define_insn "*aarch64_simd_mov<mode>"
-  [(set (match_operand:VQ 0 "aarch64_simd_nonimmediate_operand"
+  [(set (match_operand:VQ 0 "nonimmediate_operand"
 		"=w, m,  w, ?r, ?w, ?r, w")
-	(match_operand:VQ 1 "aarch64_simd_general_operand"
+	(match_operand:VQ 1 "general_operand"
 		"m,  w,  w,  w,  r,  r, Dn"))]
   "TARGET_SIMD
    && (register_operand (operands[0], <MODE>mode)
@@ -286,6 +286,37 @@
   [(set_attr "type" "neon_mul_<Vetype><q>")]
 )
 
+(define_insn "bswap<mode>"
+  [(set (match_operand:VDQHSD 0 "register_operand" "=w")
+        (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
+  "TARGET_SIMD"
+  "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
+  [(set_attr "type" "neon_rev<q>")]
+)
+
+(define_insn "aarch64_rbit<mode>"
+  [(set (match_operand:VB 0 "register_operand" "=w")
+	(unspec:VB [(match_operand:VB 1 "register_operand" "w")]
+		   UNSPEC_RBIT))]
+  "TARGET_SIMD"
+  "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
+  [(set_attr "type" "neon_rbit")]
+)
+
+(define_expand "ctz<mode>2"
+  [(set (match_operand:VS 0 "register_operand")
+        (ctz:VS (match_operand:VS 1 "register_operand")))]
+  "TARGET_SIMD"
+  {
+     emit_insn (gen_bswap<mode> (operands[0], operands[1]));
+     rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
+					     <MODE>mode, 0);
+     emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
+     emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
+     DONE;
+  }
+)
+
 (define_insn "*aarch64_mul3_elt<mode>"
  [(set (match_operand:VMUL 0 "register_operand" "=w")
     (mult:VMUL
@@ -692,25 +723,16 @@
    (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
   "TARGET_SIMD"
   {
+    /* An arithmetic shift right by 64 fills the result with copies of the sign
+       bit, just like asr by 63 - however the standard pattern does not handle
+       a shift by 64.  */
     if (INTVAL (operands[2]) == 64)
-      emit_insn (gen_aarch64_sshr_simddi (operands[0], operands[1]));
-    else
-      emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
+      operands[2] = GEN_INT (63);
+    emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
     DONE;
   }
 )
 
-;; SIMD shift by 64.  This pattern is a special case as standard pattern does
-;; not handle NEON shifts by 64.
-(define_insn "aarch64_sshr_simddi"
-  [(set (match_operand:DI 0 "register_operand" "=w")
-        (unspec:DI
-          [(match_operand:DI 1 "register_operand" "w")] UNSPEC_SSHR64))]
-  "TARGET_SIMD"
-  "sshr\t%d0, %d1, 64"
-  [(set_attr "type" "neon_shift_imm")]
-)
-
 (define_expand "vlshr<mode>3"
  [(match_operand:VQ_S 0 "register_operand" "")
   (match_operand:VQ_S 1 "register_operand" "")
@@ -731,7 +753,7 @@
   "TARGET_SIMD"
   {
     if (INTVAL (operands[2]) == 64)
-      emit_insn (gen_aarch64_ushr_simddi (operands[0], operands[1]));
+      emit_move_insn (operands[0], const0_rtx);
     else
       emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
     DONE;
@@ -738,17 +760,6 @@
   }
 )
 
-;; SIMD shift by 64.  This pattern is a special case as standard pattern does
-;; not handle NEON shifts by 64.
-(define_insn "aarch64_ushr_simddi"
-  [(set (match_operand:DI 0 "register_operand" "=w")
-        (unspec:DI
-          [(match_operand:DI 1 "register_operand" "w")] UNSPEC_USHR64))]
-  "TARGET_SIMD"
-  "ushr\t%d0, %d1, 64"
-  [(set_attr "type" "neon_shift_imm")]
-)
-
 (define_expand "vec_set<mode>"
   [(match_operand:VQ_S 0 "register_operand")
    (match_operand:<VEL> 1 "register_operand")
@@ -989,7 +1000,7 @@
    dup\\t%d0, %1.d[0]
    fmov\\t%d0, %1
    dup\\t%d0, %1"
-  [(set_attr "type" "neon_dup<q>,fmov,neon_dup<q>")
+  [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
    (set_attr "simd" "yes,*,yes")
    (set_attr "fp" "*,yes,*")
    (set_attr "length" "4")]
@@ -1081,7 +1092,7 @@
 	  (match_operand:<VHALF> 1 "register_operand" "w,r")
           (vec_select:<VHALF>
                 (match_dup 0)
-                (match_operand:VQ 2 "vect_par_cnst_hi_half" ""))))]
+                (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
   "TARGET_SIMD && BYTES_BIG_ENDIAN"
   "@
    ins\\t%0.d[1], %1.d[0]
@@ -1094,7 +1105,7 @@
   (match_operand:<VHALF> 1 "register_operand" "")]
  "TARGET_SIMD"
 {
-  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, BYTES_BIG_ENDIAN);
+  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
   if (BYTES_BIG_ENDIAN)
     emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
 		    operands[1], p));
@@ -1134,7 +1145,7 @@
 ;; For quads.
 
 (define_insn "vec_pack_trunc_<mode>"
- [(set (match_operand:<VNARROWQ2> 0 "register_operand" "+&w")
+ [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
        (vec_concat:<VNARROWQ2>
 	 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
 	 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
@@ -1576,7 +1587,7 @@
 )
 
 ;; Vector versions of the floating-point frint patterns.
-;; Expands to btrunc, ceil, floor, nearbyint, rint, round.
+;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
 (define_insn "<frint_pattern><mode>2"
   [(set (match_operand:VDQF 0 "register_operand" "=w")
 	(unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
@@ -1778,25 +1789,52 @@
 
 ;; 'across lanes' add.
 
-(define_insn "reduc_<sur>plus_<mode>"
+(define_expand "reduc_plus_scal_<mode>"
+  [(match_operand:<VEL> 0 "register_operand" "=w")
+   (unspec:VDQ [(match_operand:VDQ 1 "register_operand" "w")]
+	       UNSPEC_ADDV)]
+  "TARGET_SIMD"
+  {
+    rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
+    rtx scratch = gen_reg_rtx (<MODE>mode);
+    emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
+    emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
+    DONE;
+  }
+)
+
+(define_expand "reduc_plus_scal_<mode>"
+  [(match_operand:<VEL> 0 "register_operand" "=w")
+   (match_operand:V2F 1 "register_operand" "w")]
+  "TARGET_SIMD"
+  {
+    rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
+    rtx scratch = gen_reg_rtx (<MODE>mode);
+    emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
+    emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
+    DONE;
+  }
+)
+
+(define_insn "aarch64_reduc_plus_internal<mode>"
  [(set (match_operand:VDQV 0 "register_operand" "=w")
        (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
-		    SUADDV))]
+		    UNSPEC_ADDV))]
  "TARGET_SIMD"
  "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
   [(set_attr "type" "neon_reduc_add<q>")]
 )
 
-(define_insn "reduc_<sur>plus_v2si"
+(define_insn "aarch64_reduc_plus_internalv2si"
  [(set (match_operand:V2SI 0 "register_operand" "=w")
        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
-		    SUADDV))]
+		    UNSPEC_ADDV))]
  "TARGET_SIMD"
  "addp\\t%0.2s, %1.2s, %1.2s"
   [(set_attr "type" "neon_reduc_add")]
 )
 
-(define_insn "reduc_splus_<mode>"
+(define_insn "aarch64_reduc_plus_internal<mode>"
  [(set (match_operand:V2F 0 "register_operand" "=w")
        (unspec:V2F [(match_operand:V2F 1 "register_operand" "w")]
 		   UNSPEC_FADDV))]
@@ -1814,14 +1852,17 @@
   [(set_attr "type" "neon_fp_reduc_add_s_q")]
 )
 
-(define_expand "reduc_splus_v4sf"
- [(set (match_operand:V4SF 0 "register_operand")
+(define_expand "reduc_plus_scal_v4sf"
+ [(set (match_operand:SF 0 "register_operand")
        (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
 		    UNSPEC_FADDV))]
  "TARGET_SIMD"
 {
-  emit_insn (gen_aarch64_addpv4sf (operands[0], operands[1]));
-  emit_insn (gen_aarch64_addpv4sf (operands[0], operands[0]));
+  rtx elt = GEN_INT (ENDIAN_LANE_N (V4SFmode, 0));
+  rtx scratch = gen_reg_rtx (V4SFmode);
+  emit_insn (gen_aarch64_addpv4sf (scratch, operands[1]));
+  emit_insn (gen_aarch64_addpv4sf (scratch, scratch));
+  emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
   DONE;
 })
 
@@ -1835,7 +1876,40 @@
 
 ;; 'across lanes' max and min ops.
 
-(define_insn "reduc_<maxmin_uns>_<mode>"
+;; Template for outputting a scalar, so we can create __builtins which can be
+;; gimple_fold'd to the REDUC_(MAX|MIN)_EXPR tree code.  (This is FP smax/smin).
+(define_expand "reduc_<maxmin_uns>_scal_<mode>"
+  [(match_operand:<VEL> 0 "register_operand")
+   (unspec:VDQF [(match_operand:VDQF 1 "register_operand")]
+		FMAXMINV)]
+  "TARGET_SIMD"
+  {
+    rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
+    rtx scratch = gen_reg_rtx (<MODE>mode);
+    emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
+							      operands[1]));
+    emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
+    DONE;
+  }
+)
+
+;; Likewise for integer cases, signed and unsigned.
+(define_expand "reduc_<maxmin_uns>_scal_<mode>"
+  [(match_operand:<VEL> 0 "register_operand")
+   (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
+		    MAXMINV)]
+  "TARGET_SIMD"
+  {
+    rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
+    rtx scratch = gen_reg_rtx (<MODE>mode);
+    emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
+							      operands[1]));
+    emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
+    DONE;
+  }
+)
+
+(define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
  [(set (match_operand:VDQV_S 0 "register_operand" "=w")
        (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
 		    MAXMINV))]
@@ -1844,7 +1918,7 @@
   [(set_attr "type" "neon_reduc_minmax<q>")]
 )
 
-(define_insn "reduc_<maxmin_uns>_v2si"
+(define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
  [(set (match_operand:V2SI 0 "register_operand" "=w")
        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
 		    MAXMINV))]
@@ -1853,24 +1927,15 @@
   [(set_attr "type" "neon_reduc_minmax")]
 )
 
-(define_insn "reduc_<maxmin_uns>_<mode>"
- [(set (match_operand:V2F 0 "register_operand" "=w")
-       (unspec:V2F [(match_operand:V2F 1 "register_operand" "w")]
+(define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
+ [(set (match_operand:VDQF 0 "register_operand" "=w")
+       (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
 		    FMAXMINV))]
  "TARGET_SIMD"
- "<maxmin_uns_op>p\\t%<Vetype>0, %1.<Vtype>"
+ "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
   [(set_attr "type" "neon_fp_reduc_minmax_<Vetype><q>")]
 )
 
-(define_insn "reduc_<maxmin_uns>_v4sf"
- [(set (match_operand:V4SF 0 "register_operand" "=w")
-       (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")]
-		    FMAXMINV))]
- "TARGET_SIMD"
- "<maxmin_uns_op>v\\t%s0, %1.4s"
-  [(set_attr "type" "neon_fp_reduc_minmax_s_q")]
-)
-
 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
 ;; allocation.
 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
@@ -1888,15 +1953,15 @@
 ;;     bif op0, op1, mask
 
 (define_insn "aarch64_simd_bsl<mode>_internal"
-  [(set (match_operand:VALLDIF 0 "register_operand"		"=w,w,w")
-	(ior:VALLDIF
-	   (and:VALLDIF
-	     (match_operand:<V_cmp_result> 1 "register_operand"	" 0,w,w")
-	     (match_operand:VALLDIF 2 "register_operand"	" w,w,0"))
-	   (and:VALLDIF
+  [(set (match_operand:VSDQ_I_DI 0 "register_operand"		"=w,w,w")
+	(ior:VSDQ_I_DI
+	   (and:VSDQ_I_DI
 	     (not:<V_cmp_result>
-		(match_dup:<V_cmp_result> 1))
-	     (match_operand:VALLDIF 3 "register_operand"	" w,0,w"))
+	       (match_operand:<V_cmp_result> 1 "register_operand"	" 0,w,w"))
+	     (match_operand:VSDQ_I_DI 3 "register_operand"	" w,0,w"))
+	   (and:VSDQ_I_DI
+	     (match_dup:<V_cmp_result> 1)
+	     (match_operand:VSDQ_I_DI 2 "register_operand"	" w,w,0"))
 	))]
   "TARGET_SIMD"
   "@
@@ -1914,9 +1979,21 @@
  "TARGET_SIMD"
 {
   /* We can't alias operands together if they have different modes.  */
+  rtx tmp = operands[0];
+  if (FLOAT_MODE_P (<MODE>mode))
+    {
+      operands[2] = gen_lowpart (<V_cmp_result>mode, operands[2]);
+      operands[3] = gen_lowpart (<V_cmp_result>mode, operands[3]);
+      tmp = gen_reg_rtx (<V_cmp_result>mode);
+    }
   operands[1] = gen_lowpart (<V_cmp_result>mode, operands[1]);
-  emit_insn (gen_aarch64_simd_bsl<mode>_internal (operands[0], operands[1],
-						  operands[2], operands[3]));
+  emit_insn (gen_aarch64_simd_bsl<v_cmp_result>_internal (tmp,
+							  operands[1],
+							  operands[2],
+							  operands[3]));
+  if (tmp != operands[0])
+    emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
+
   DONE;
 })
 
@@ -1930,58 +2007,94 @@
 	  (match_operand:VDQ 2 "nonmemory_operand")))]
   "TARGET_SIMD"
 {
-  int inverse = 0, has_zero_imm_form = 0;
   rtx op1 = operands[1];
   rtx op2 = operands[2];
   rtx mask = gen_reg_rtx (<MODE>mode);
+  enum rtx_code code = GET_CODE (operands[3]);
 
-  switch (GET_CODE (operands[3]))
+  /* Switching OP1 and OP2 is necessary for NE (to output a cmeq insn),
+     and desirable for other comparisons if it results in FOO ? -1 : 0
+     (this allows direct use of the comparison result without a bsl).  */
+  if (code == NE
+      || (code != EQ
+	  && op1 == CONST0_RTX (<V_cmp_result>mode)
+	  && op2 == CONSTM1_RTX (<V_cmp_result>mode)))
     {
+      op1 = operands[2];
+      op2 = operands[1];
+      switch (code)
+        {
+        case LE: code = GT; break;
+        case LT: code = GE; break;
+        case GE: code = LT; break;
+        case GT: code = LE; break;
+        /* No case EQ.  */
+        case NE: code = EQ; break;
+        case LTU: code = GEU; break;
+        case LEU: code = GTU; break;
+        case GTU: code = LEU; break;
+        case GEU: code = LTU; break;
+        default: gcc_unreachable ();
+        }
+    }
+
+  /* Make sure we can handle the last operand.  */
+  switch (code)
+    {
+    case NE:
+      /* Normalized to EQ above.  */
+      gcc_unreachable ();
+
     case LE:
     case LT:
-    case NE:
-      inverse = 1;
-      /* Fall through.  */
     case GE:
     case GT:
     case EQ:
-      has_zero_imm_form = 1;
-      break;
-    case LEU:
-    case LTU:
-      inverse = 1;
-      break;
+      /* These instructions have a form taking an immediate zero.  */
+      if (operands[5] == CONST0_RTX (<MODE>mode))
+        break;
+      /* Fall through, as may need to load into register.  */
     default:
+      if (!REG_P (operands[5]))
+        operands[5] = force_reg (<MODE>mode, operands[5]);
       break;
     }
 
-  if (!REG_P (operands[5])
-      && (operands[5] != CONST0_RTX (<MODE>mode) || !has_zero_imm_form))
-    operands[5] = force_reg (<MODE>mode, operands[5]);
-
-  switch (GET_CODE (operands[3]))
+  switch (code)
     {
     case LT:
+      emit_insn (gen_aarch64_cmlt<mode> (mask, operands[4], operands[5]));
+      break;
+
     case GE:
       emit_insn (gen_aarch64_cmge<mode> (mask, operands[4], operands[5]));
       break;
 
     case LE:
+      emit_insn (gen_aarch64_cmle<mode> (mask, operands[4], operands[5]));
+      break;
+
     case GT:
       emit_insn (gen_aarch64_cmgt<mode> (mask, operands[4], operands[5]));
       break;
 
     case LTU:
+      emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[5], operands[4]));
+      break;
+
     case GEU:
       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[4], operands[5]));
       break;
 
     case LEU:
+      emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[5], operands[4]));
+      break;
+
     case GTU:
       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[4], operands[5]));
       break;
 
-    case NE:
+    /* NE has been normalized to EQ above.  */
     case EQ:
       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[4], operands[5]));
       break;
@@ -1990,12 +2103,6 @@
       gcc_unreachable ();
     }
 
-  if (inverse)
-    {
-      op1 = operands[2];
-      op2 = operands[1];
-    }
-
     /* If we have (a = (b CMP c) ? -1 : 0);
        Then we can simply move the generated mask.  */
 
@@ -2383,6 +2490,15 @@
   DONE;
 })
 
+(define_expand "aarch64_reinterpretdf<mode>"
+  [(match_operand:DF 0 "register_operand" "")
+   (match_operand:VD_RE 1 "register_operand" "")]
+  "TARGET_SIMD"
+{
+  aarch64_simd_reinterpret (operands[0], operands[1]);
+  DONE;
+})
+
 (define_expand "aarch64_reinterpretv16qi<mode>"
   [(match_operand:V16QI 0 "register_operand" "")
    (match_operand:VQ 1 "register_operand" "")]
@@ -2769,9 +2885,9 @@
 ;; <su>q<absneg>
 
 (define_insn "aarch64_s<optab><mode>"
-  [(set (match_operand:VSDQ_I_BHSI 0 "register_operand" "=w")
-	(UNQOPS:VSDQ_I_BHSI
-	  (match_operand:VSDQ_I_BHSI 1 "register_operand" "w")))]
+  [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
+	(UNQOPS:VSDQ_I
+	  (match_operand:VSDQ_I 1 "register_operand" "w")))]
   "TARGET_SIMD"
   "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
   [(set_attr "type" "neon_<optab><q>")]
@@ -3470,7 +3586,7 @@
 
 (define_expand "aarch64_sqdmull_laneq<mode>"
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
-   (match_operand:VD_HSI 1 "register_operand" "w")
+   (match_operand:VSD_HSI 1 "register_operand" "w")
    (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
    (match_operand:SI 3 "immediate_operand" "i")]
   "TARGET_SIMD"
@@ -3679,12 +3795,12 @@
 (define_insn "aarch64_<sur>shll_n<mode>"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
 	(unspec:<VWIDE> [(match_operand:VDW 1 "register_operand" "w")
-			 (match_operand:SI 2 "immediate_operand" "i")]
+			 (match_operand:SI 2
+			   "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
                          VSHLL))]
   "TARGET_SIMD"
   "*
   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
-  aarch64_simd_const_bounds (operands[2], 0, bit_width + 1);
   if (INTVAL (operands[2]) == bit_width)
   {
     return \"shll\\t%0.<Vwtype>, %1.<Vtype>, %2\";
@@ -3705,7 +3821,6 @@
   "TARGET_SIMD"
   "*
   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
-  aarch64_simd_const_bounds (operands[2], 0, bit_width + 1);
   if (INTVAL (operands[2]) == bit_width)
   {
     return \"shll2\\t%0.<Vwtype>, %1.<Vtype>, %2\";
@@ -3721,13 +3836,11 @@
 (define_insn "aarch64_<sur>shr_n<mode>"
   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
-			   (match_operand:SI 2 "immediate_operand" "i")]
+			   (match_operand:SI 2
+			     "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
 			  VRSHR_N))]
   "TARGET_SIMD"
-  "*
-  int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
-  aarch64_simd_const_bounds (operands[2], 1, bit_width + 1);
-  return \"<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2\";"
+  "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
   [(set_attr "type" "neon_sat_shift_imm<q>")]
 )
 
@@ -3737,13 +3850,11 @@
   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
 	(unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
 		       (match_operand:VSDQ_I_DI 2 "register_operand" "w")
-                       (match_operand:SI 3 "immediate_operand" "i")]
+                       (match_operand:SI 3
+			 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
                       VSRA))]
   "TARGET_SIMD"
-  "*
-  int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
-  aarch64_simd_const_bounds (operands[3], 1, bit_width + 1);
-  return \"<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3\";"
+  "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
   [(set_attr "type" "neon_shift_acc<q>")]
 )
 
@@ -3753,14 +3864,11 @@
   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
 	(unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
 		       (match_operand:VSDQ_I_DI 2 "register_operand" "w")
-                       (match_operand:SI 3 "immediate_operand" "i")]
+                       (match_operand:SI 3
+			 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
                       VSLRI))]
   "TARGET_SIMD"
-  "*
-  int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
-  aarch64_simd_const_bounds (operands[3], 1 - <VSLRI:offsetlr>,
-                             bit_width - <VSLRI:offsetlr> + 1);
-  return \"s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3\";"
+  "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
   [(set_attr "type" "neon_shift_imm<q>")]
 )
 
@@ -3769,13 +3877,11 @@
 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
 	(unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
-		       (match_operand:SI 2 "immediate_operand" "i")]
+		       (match_operand:SI 2
+			 "aarch64_simd_shift_imm_<ve_mode>" "i")]
                       VQSHL_N))]
   "TARGET_SIMD"
-  "*
-  int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
-  aarch64_simd_const_bounds (operands[2], 0, bit_width);
-  return \"<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2\";"
+  "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
   [(set_attr "type" "neon_sat_shift_imm<q>")]
 )
 
@@ -3785,13 +3891,11 @@
 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
-			    (match_operand:SI 2 "immediate_operand" "i")]
+			    (match_operand:SI 2
+			      "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
 			   VQSHRN_N))]
   "TARGET_SIMD"
-  "*
-  int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
-  aarch64_simd_const_bounds (operands[2], 1, bit_width + 1);
-  return \"<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2\";"
+  "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
 )
 
@@ -3823,26 +3927,46 @@
 	  )))
      (clobber (reg:CC CC_REGNUM))]
   "TARGET_SIMD"
-  "@
-  cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
-  cm<optab>\t%d0, %d1, #0
-  #"
-  "reload_completed
-   /* We need to prevent the split from
-      happening in the 'w' constraint cases.  */
-   && GP_REGNUM_P (REGNO (operands[0]))
-   && GP_REGNUM_P (REGNO (operands[1]))"
-  [(const_int 0)]
+  "#"
+  "reload_completed"
+  [(set (match_operand:DI 0 "register_operand")
+	(neg:DI
+	  (COMPARISONS:DI
+	    (match_operand:DI 1 "register_operand")
+	    (match_operand:DI 2 "aarch64_simd_reg_or_zero")
+	  )))]
   {
-    enum machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
-    rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
-    rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
-    emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
-    DONE;
+    /* If we are in the general purpose register file,
+       we split to a sequence of comparison and store.  */
+    if (GP_REGNUM_P (REGNO (operands[0]))
+	&& GP_REGNUM_P (REGNO (operands[1])))
+      {
+	enum machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
+	rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
+	rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
+	emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
+	DONE;
+      }
+    /* Otherwise, we expand to a similar pattern which does not
+       clobber CC_REGNUM.  */
   }
   [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
 )
 
+(define_insn "*aarch64_cm<optab>di"
+  [(set (match_operand:DI 0 "register_operand" "=w,w")
+	(neg:DI
+	  (COMPARISONS:DI
+	    (match_operand:DI 1 "register_operand" "w,w")
+	    (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
+	  )))]
+  "TARGET_SIMD && reload_completed"
+  "@
+  cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
+  cm<optab>\t%d0, %d1, #0"
+  [(set_attr "type" "neon_compare, neon_compare_zero")]
+)
+
 ;; cm(hs|hi)
 
 (define_insn "aarch64_cm<optab><mode>"
@@ -3866,35 +3990,62 @@
 	  )))
     (clobber (reg:CC CC_REGNUM))]
   "TARGET_SIMD"
-  "@
-  cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
-  #"
-  "reload_completed
-   /* We need to prevent the split from
-      happening in the 'w' constraint cases.  */
-   && GP_REGNUM_P (REGNO (operands[0]))
-   && GP_REGNUM_P (REGNO (operands[1]))"
-  [(const_int 0)]
+  "#"
+  "reload_completed"
+  [(set (match_operand:DI 0 "register_operand")
+	(neg:DI
+	  (UCOMPARISONS:DI
+	    (match_operand:DI 1 "register_operand")
+	    (match_operand:DI 2 "aarch64_simd_reg_or_zero")
+	  )))]
   {
-    enum machine_mode mode = CCmode;
-    rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
-    rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
-    emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
-    DONE;
+    /* If we are in the general purpose register file,
+       we split to a sequence of comparison and store.  */
+    if (GP_REGNUM_P (REGNO (operands[0]))
+	&& GP_REGNUM_P (REGNO (operands[1])))
+      {
+	enum machine_mode mode = CCmode;
+	rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
+	rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
+	emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
+	DONE;
+      }
+    /* Otherwise, we expand to a similar pattern which does not
+       clobber CC_REGNUM.  */
   }
-  [(set_attr "type" "neon_compare, neon_compare_zero")]
+  [(set_attr "type" "neon_compare,multiple")]
 )
 
+(define_insn "*aarch64_cm<optab>di"
+  [(set (match_operand:DI 0 "register_operand" "=w")
+	(neg:DI
+	  (UCOMPARISONS:DI
+	    (match_operand:DI 1 "register_operand" "w")
+	    (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
+	  )))]
+  "TARGET_SIMD && reload_completed"
+  "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
+  [(set_attr "type" "neon_compare")]
+)
+
 ;; cmtst
 
+;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
+;; we don't have any insns using ne, and aarch64_vcond_internal outputs
+;; not (neg (eq (and x y) 0))
+;; which is rewritten by simplify_rtx as
+;; plus (eq (and x y) 0) -1.
+
 (define_insn "aarch64_cmtst<mode>"
   [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
-	(neg:<V_cmp_result>
-	  (ne:<V_cmp_result>
+	(plus:<V_cmp_result>
+	  (eq:<V_cmp_result>
 	    (and:VDQ
 	      (match_operand:VDQ 1 "register_operand" "w")
 	      (match_operand:VDQ 2 "register_operand" "w"))
-	    (vec_duplicate:<V_cmp_result> (const_int 0)))))]
+	    (match_operand:VDQ 3 "aarch64_simd_imm_zero"))
+	  (match_operand:<V_cmp_result> 4 "aarch64_simd_imm_minus_one")))
+  ]
   "TARGET_SIMD"
   "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
   [(set_attr "type" "neon_tst<q>")]
@@ -3910,23 +4061,44 @@
 	    (const_int 0))))
     (clobber (reg:CC CC_REGNUM))]
   "TARGET_SIMD"
-  "@
-  cmtst\t%d0, %d1, %d2
-  #"
-  "reload_completed
-   /* We need to prevent the split from
-      happening in the 'w' constraint cases.  */
-   && GP_REGNUM_P (REGNO (operands[0]))
-   && GP_REGNUM_P (REGNO (operands[1]))"
-  [(const_int 0)]
+  "#"
+  "reload_completed"
+  [(set (match_operand:DI 0 "register_operand")
+	(neg:DI
+	  (ne:DI
+	    (and:DI
+	      (match_operand:DI 1 "register_operand")
+	      (match_operand:DI 2 "register_operand"))
+	    (const_int 0))))]
   {
-    rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
-    enum machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
-    rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
-    rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
-    emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
-    DONE;
+    /* If we are in the general purpose register file,
+       we split to a sequence of comparison and store.  */
+    if (GP_REGNUM_P (REGNO (operands[0]))
+	&& GP_REGNUM_P (REGNO (operands[1])))
+      {
+	rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
+	enum machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
+	rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
+	rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
+	emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
+	DONE;
+      }
+    /* Otherwise, we expand to a similar pattern which does not
+       clobber CC_REGNUM.  */
   }
+  [(set_attr "type" "neon_tst,multiple")]
+)
+
+(define_insn "*aarch64_cmtstdi"
+  [(set (match_operand:DI 0 "register_operand" "=w")
+	(neg:DI
+	  (ne:DI
+	    (and:DI
+	      (match_operand:DI 1 "register_operand" "w")
+	      (match_operand:DI 2 "register_operand" "w"))
+	    (const_int 0))))]
+  "TARGET_SIMD"
+  "cmtst\t%d0, %d1, %d2"
   [(set_attr "type" "neon_tst")]
 )
 
@@ -4007,6 +4179,28 @@
   [(set_attr "type" "neon_load2_2reg<q>")]
 )
 
+(define_insn "aarch64_simd_ld2r<mode>"
+  [(set (match_operand:OI 0 "register_operand" "=w")
+       (unspec:OI [(match_operand:<V_TWO_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
+                   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
+                  UNSPEC_LD2_DUP))]
+  "TARGET_SIMD"
+  "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
+  [(set_attr "type" "neon_load2_all_lanes<q>")]
+)
+
+(define_insn "aarch64_vec_load_lanesoi_lane<mode>"
+  [(set (match_operand:OI 0 "register_operand" "=w")
+	(unspec:OI [(match_operand:<V_TWO_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
+		    (match_operand:OI 2 "register_operand" "0")
+		    (match_operand:SI 3 "immediate_operand" "i")
+		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
+		   UNSPEC_LD2_LANE))]
+  "TARGET_SIMD"
+  "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1"
+  [(set_attr "type" "neon_load2_one_lane")]
+)
+
 (define_insn "vec_store_lanesoi<mode>"
   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
 	(unspec:OI [(match_operand:OI 1 "register_operand" "w")
@@ -4017,6 +4211,17 @@
   [(set_attr "type" "neon_store2_2reg<q>")]
 )
 
+(define_insn "vec_store_lanesoi_lane<mode>"
+  [(set (match_operand:<V_TWO_ELEM> 0 "aarch64_simd_struct_operand" "=Utv")
+	(unspec:<V_TWO_ELEM> [(match_operand:OI 1 "register_operand" "w")
+                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
+		    (match_operand:SI 2 "immediate_operand" "i")]
+                   UNSPEC_ST2_LANE))]
+  "TARGET_SIMD"
+  "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0"
+  [(set_attr "type" "neon_store3_one_lane<q>")]
+)
+
 (define_insn "vec_load_lanesci<mode>"
   [(set (match_operand:CI 0 "register_operand" "=w")
 	(unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
@@ -4027,6 +4232,28 @@
   [(set_attr "type" "neon_load3_3reg<q>")]
 )
 
+(define_insn "aarch64_simd_ld3r<mode>"
+  [(set (match_operand:CI 0 "register_operand" "=w")
+       (unspec:CI [(match_operand:<V_THREE_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
+                   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
+                  UNSPEC_LD3_DUP))]
+  "TARGET_SIMD"
+  "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
+  [(set_attr "type" "neon_load3_all_lanes<q>")]
+)
+
+(define_insn "aarch64_vec_load_lanesci_lane<mode>"
+  [(set (match_operand:CI 0 "register_operand" "=w")
+	(unspec:CI [(match_operand:<V_THREE_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
+		    (match_operand:CI 2 "register_operand" "0")
+		    (match_operand:SI 3 "immediate_operand" "i")
+		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+		   UNSPEC_LD3_LANE))]
+  "TARGET_SIMD"
+  "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1"
+  [(set_attr "type" "neon_load3_one_lane")]
+)
+
 (define_insn "vec_store_lanesci<mode>"
   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
 	(unspec:CI [(match_operand:CI 1 "register_operand" "w")
@@ -4037,6 +4264,17 @@
   [(set_attr "type" "neon_store3_3reg<q>")]
 )
 
+(define_insn "vec_store_lanesci_lane<mode>"
+  [(set (match_operand:<V_THREE_ELEM> 0 "aarch64_simd_struct_operand" "=Utv")
+	(unspec:<V_THREE_ELEM> [(match_operand:CI 1 "register_operand" "w")
+                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
+		    (match_operand:SI 2 "immediate_operand" "i")]
+                   UNSPEC_ST3_LANE))]
+  "TARGET_SIMD"
+  "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0"
+  [(set_attr "type" "neon_store3_one_lane<q>")]
+)
+
 (define_insn "vec_load_lanesxi<mode>"
   [(set (match_operand:XI 0 "register_operand" "=w")
 	(unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
@@ -4047,6 +4285,28 @@
   [(set_attr "type" "neon_load4_4reg<q>")]
 )
 
+(define_insn "aarch64_simd_ld4r<mode>"
+  [(set (match_operand:XI 0 "register_operand" "=w")
+       (unspec:XI [(match_operand:<V_FOUR_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
+                   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
+                  UNSPEC_LD4_DUP))]
+  "TARGET_SIMD"
+  "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
+  [(set_attr "type" "neon_load4_all_lanes<q>")]
+)
+
+(define_insn "aarch64_vec_load_lanesxi_lane<mode>"
+  [(set (match_operand:XI 0 "register_operand" "=w")
+	(unspec:XI [(match_operand:<V_FOUR_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
+		    (match_operand:XI 2 "register_operand" "0")
+		    (match_operand:SI 3 "immediate_operand" "i")
+		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+		   UNSPEC_LD4_LANE))]
+  "TARGET_SIMD"
+  "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1"
+  [(set_attr "type" "neon_load4_one_lane")]
+)
+
 (define_insn "vec_store_lanesxi<mode>"
   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
 	(unspec:XI [(match_operand:XI 1 "register_operand" "w")
@@ -4057,6 +4317,17 @@
   [(set_attr "type" "neon_store4_4reg<q>")]
 )
 
+(define_insn "vec_store_lanesxi_lane<mode>"
+  [(set (match_operand:<V_FOUR_ELEM> 0 "aarch64_simd_struct_operand" "=Utv")
+	(unspec:<V_FOUR_ELEM> [(match_operand:XI 1 "register_operand" "w")
+                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
+		    (match_operand:SI 2 "immediate_operand" "i")]
+                   UNSPEC_ST4_LANE))]
+  "TARGET_SIMD"
+  "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0"
+  [(set_attr "type" "neon_store4_one_lane<q>")]
+)
+
 ;; Reload patterns for AdvSIMD register list operands.
 
 (define_expand "mov<mode>"
@@ -4176,6 +4447,45 @@
   aarch64_simd_disambiguate_copy (operands, dest, src, 4);
 })
 
+(define_expand "aarch64_ld2r<mode>"
+  [(match_operand:OI 0 "register_operand" "=w")
+   (match_operand:DI 1 "register_operand" "w")
+   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+  "TARGET_SIMD"
+{
+  enum machine_mode mode = <V_TWO_ELEM>mode;
+  rtx mem = gen_rtx_MEM (mode, operands[1]);
+
+  emit_insn (gen_aarch64_simd_ld2r<mode> (operands[0], mem));
+  DONE;
+})
+
+(define_expand "aarch64_ld3r<mode>"
+  [(match_operand:CI 0 "register_operand" "=w")
+   (match_operand:DI 1 "register_operand" "w")
+   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+  "TARGET_SIMD"
+{
+  enum machine_mode mode = <V_THREE_ELEM>mode;
+  rtx mem = gen_rtx_MEM (mode, operands[1]);
+
+  emit_insn (gen_aarch64_simd_ld3r<mode> (operands[0], mem));
+  DONE;
+})
+
+(define_expand "aarch64_ld4r<mode>"
+  [(match_operand:XI 0 "register_operand" "=w")
+   (match_operand:DI 1 "register_operand" "w")
+   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+  "TARGET_SIMD"
+{
+  enum machine_mode mode = <V_FOUR_ELEM>mode;
+  rtx mem = gen_rtx_MEM (mode, operands[1]);
+
+  emit_insn (gen_aarch64_simd_ld4r<mode> (operands[0],mem));
+  DONE;
+})
+
 (define_insn "aarch64_ld2<mode>_dreg"
   [(set (match_operand:OI 0 "register_operand" "=w")
 	(subreg:OI
@@ -4349,6 +4659,65 @@
   DONE;
 })
 
+(define_expand "aarch64_ld2_lane<mode>"
+  [(match_operand:OI 0 "register_operand" "=w")
+	(match_operand:DI 1 "register_operand" "w")
+	(match_operand:OI 2 "register_operand" "0")
+	(match_operand:SI 3 "immediate_operand" "i")
+	(unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+  "TARGET_SIMD"
+{
+  enum machine_mode mode = <V_TWO_ELEM>mode;
+  rtx mem = gen_rtx_MEM (mode, operands[1]);
+
+  aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
+  emit_insn (gen_aarch64_vec_load_lanesoi_lane<mode> (operands[0],
+						      mem,
+						      operands[2],
+						      operands[3]));
+  DONE;
+})
+
+(define_expand "aarch64_ld3_lane<mode>"
+  [(match_operand:CI 0 "register_operand" "=w")
+	(match_operand:DI 1 "register_operand" "w")
+	(match_operand:CI 2 "register_operand" "0")
+	(match_operand:SI 3 "immediate_operand" "i")
+	(unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+  "TARGET_SIMD"
+{
+  enum machine_mode mode = <V_THREE_ELEM>mode;
+  rtx mem = gen_rtx_MEM (mode, operands[1]);
+
+  aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
+  emit_insn (gen_aarch64_vec_load_lanesci_lane<mode> (operands[0],
+						      mem,
+						      operands[2],
+						      operands[3]));
+  DONE;
+})
+
+(define_expand "aarch64_ld4_lane<mode>"
+  [(match_operand:XI 0 "register_operand" "=w")
+	(match_operand:DI 1 "register_operand" "w")
+	(match_operand:XI 2 "register_operand" "0")
+	(match_operand:SI 3 "immediate_operand" "i")
+	(unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+  "TARGET_SIMD"
+{
+  enum machine_mode mode = <V_FOUR_ELEM>mode;
+  rtx mem = gen_rtx_MEM (mode, operands[1]);
+
+  aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
+  emit_insn (gen_aarch64_vec_load_lanesxi_lane<mode> (operands[0],
+						      mem,
+						      operands[2],
+						      operands[3]));
+  DONE;
+})
+
+
+
 ;; Expanders for builtins to extract vector registers from large
 ;; opaque integer modes.
 
@@ -4410,7 +4779,7 @@
    (match_operand:VB 1 "register_operand")
    (match_operand:VB 2 "register_operand")
    (match_operand:VB 3 "register_operand")]
-  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
+  "TARGET_SIMD"
 {
   aarch64_expand_vec_perm (operands[0], operands[1],
 			   operands[2], operands[3]);
@@ -4465,6 +4834,44 @@
   [(set_attr "type" "neon_permute<q>")]
 )
 
+;; Note immediate (third) operand is lane index not byte index.
+(define_insn "aarch64_ext<mode>"
+  [(set (match_operand:VALL 0 "register_operand" "=w")
+        (unspec:VALL [(match_operand:VALL 1 "register_operand" "w")
+                      (match_operand:VALL 2 "register_operand" "w")
+                      (match_operand:SI 3 "immediate_operand" "i")]
+                     UNSPEC_EXT))]
+  "TARGET_SIMD"
+{
+  operands[3] = GEN_INT (INTVAL (operands[3])
+      * GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)));
+  return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
+}
+  [(set_attr "type" "neon_ext<q>")]
+)
+
+;; This exists solely to check the arguments to the corresponding __builtin.
+;; Used where we want an error for out-of-range indices which would otherwise
+;; be silently wrapped (e.g. the mask to a __builtin_shuffle).
+(define_expand "aarch64_im_lane_boundsi"
+  [(match_operand:SI 0 "immediate_operand" "i")
+   (match_operand:SI 1 "immediate_operand" "i")]
+  "TARGET_SIMD"
+{
+  aarch64_simd_lane_bounds (operands[0], 0, INTVAL (operands[1]));
+  DONE;
+}
+)
+
+(define_insn "aarch64_rev<REVERSE:rev_op><mode>"
+  [(set (match_operand:VALL 0 "register_operand" "=w")
+	(unspec:VALL [(match_operand:VALL 1 "register_operand" "w")]
+                    REVERSE))]
+  "TARGET_SIMD"
+  "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
+  [(set_attr "type" "neon_rev<q>")]
+)
+
 (define_insn "aarch64_st2<mode>_dreg"
   [(set (match_operand:TI 0 "aarch64_simd_struct_operand" "=Utv")
 	(unspec:TI [(match_operand:OI 1 "register_operand" "w")
@@ -4551,6 +4958,57 @@
   DONE;
 })
 
+(define_expand "aarch64_st2_lane<VQ:mode>"
+ [(match_operand:DI 0 "register_operand" "r")
+  (match_operand:OI 1 "register_operand" "w")
+  (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
+  (match_operand:SI 2 "immediate_operand")]
+  "TARGET_SIMD"
+{
+  enum machine_mode mode = <V_TWO_ELEM>mode;
+  rtx mem = gen_rtx_MEM (mode, operands[0]);
+  operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
+
+  emit_insn (gen_vec_store_lanesoi_lane<VQ:mode> (mem,
+						  operands[1],
+						  operands[2]));
+  DONE;
+})
+
+(define_expand "aarch64_st3_lane<VQ:mode>"
+ [(match_operand:DI 0 "register_operand" "r")
+  (match_operand:CI 1 "register_operand" "w")
+  (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
+  (match_operand:SI 2 "immediate_operand")]
+  "TARGET_SIMD"
+{
+  enum machine_mode mode = <V_THREE_ELEM>mode;
+  rtx mem = gen_rtx_MEM (mode, operands[0]);
+  operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
+
+  emit_insn (gen_vec_store_lanesci_lane<VQ:mode> (mem,
+						  operands[1],
+						  operands[2]));
+  DONE;
+})
+
+(define_expand "aarch64_st4_lane<VQ:mode>"
+ [(match_operand:DI 0 "register_operand" "r")
+  (match_operand:XI 1 "register_operand" "w")
+  (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
+  (match_operand:SI 2 "immediate_operand")]
+  "TARGET_SIMD"
+{
+  enum machine_mode mode = <V_FOUR_ELEM>mode;
+  rtx mem = gen_rtx_MEM (mode, operands[0]);
+  operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
+
+  emit_insn (gen_vec_store_lanesxi_lane<VQ:mode> (mem,
+						  operands[1],
+						  operands[2]));
+  DONE;
+})
+
 (define_expand "aarch64_st1<VALL:mode>"
  [(match_operand:DI 0 "register_operand")
   (match_operand:VALL 1 "register_operand")]
--- a/src/gcc/config/aarch64/predicates.md
+++ b/src/gcc/config/aarch64/predicates.md
@@ -26,6 +26,10 @@
 			      && GET_MODE_CLASS (GET_MODE (op)) == MODE_CC"))))
 )
 
+(define_predicate "aarch64_call_insn_operand"
+  (ior (match_code "symbol_ref")
+       (match_operand 0 "register_operand")))
+
 (define_predicate "aarch64_simd_register"
   (and (match_code "reg")
        (ior (match_test "REGNO_REG_CLASS (REGNO (op)) == FP_LO_REGS")
@@ -119,6 +123,10 @@
        (match_test "INTVAL (op) != 0
 		    && (unsigned) exact_log2 (INTVAL (op)) < 64")))
 
+(define_predicate "aarch64_mem_pair_offset"
+  (and (match_code "const_int")
+       (match_test "aarch64_offset_7bit_signed_scaled_p (mode, INTVAL (op))")))
+
 (define_predicate "aarch64_mem_pair_operand"
   (and (match_code "mem")
        (match_test "aarch64_legitimate_address_p (mode, XEXP (op, 0), PARALLEL,
@@ -194,6 +202,18 @@
 (define_special_predicate "aarch64_comparison_operator"
   (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu,unordered,ordered,unlt,unle,unge,ungt"))
 
+(define_special_predicate "aarch64_comparison_operation"
+  (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu,unordered,ordered,unlt,unle,unge,ungt")
+{
+  if (XEXP (op, 1) != const0_rtx)
+    return false;
+  rtx op0 = XEXP (op, 0);
+  if (!REG_P (op0) || REGNO (op0) != CC_REGNUM)
+    return false;
+  return aarch64_get_condition_code (op) >= 0;
+})
+
+
 ;; True if the operand is memory reference suitable for a load/store exclusive.
 (define_predicate "aarch64_sync_memory_operand"
   (and (match_operand 0 "memory_operand")
@@ -203,62 +223,15 @@
 (define_special_predicate "vect_par_cnst_hi_half"
   (match_code "parallel")
 {
-  HOST_WIDE_INT count = XVECLEN (op, 0);
-  int nunits = GET_MODE_NUNITS (mode);
-  int i;
-
-  if (count < 1
-      || count != nunits / 2)
-    return false;
- 
-  if (!VECTOR_MODE_P (mode))
-    return false;
-
-  for (i = 0; i < count; i++)
-   {
-     rtx elt = XVECEXP (op, 0, i);
-     int val;
-
-     if (GET_CODE (elt) != CONST_INT)
-       return false;
-
-     val = INTVAL (elt);
-     if (val != (nunits / 2) + i)
-       return false;
-   }
-  return true;
+  return aarch64_simd_check_vect_par_cnst_half (op, mode, true);
 })
 
 (define_special_predicate "vect_par_cnst_lo_half"
   (match_code "parallel")
 {
-  HOST_WIDE_INT count = XVECLEN (op, 0);
-  int nunits = GET_MODE_NUNITS (mode);
-  int i;
-
-  if (count < 1
-      || count != nunits / 2)
-    return false;
-
-  if (!VECTOR_MODE_P (mode))
-    return false;
-
-  for (i = 0; i < count; i++)
-   {
-     rtx elt = XVECEXP (op, 0, i);
-     int val;
-
-     if (GET_CODE (elt) != CONST_INT)
-       return false;
-
-     val = INTVAL (elt);
-     if (val != i)
-       return false;
-   }
-  return true;
+  return aarch64_simd_check_vect_par_cnst_half (op, mode, false);
 })
 
-
 (define_special_predicate "aarch64_simd_lshift_imm"
   (match_code "const_vector")
 {
@@ -300,3 +273,62 @@
 {
   return aarch64_simd_imm_zero_p (op, mode);
 })
+
+(define_special_predicate "aarch64_simd_imm_minus_one"
+  (match_code "const_vector")
+{
+  return aarch64_const_vec_all_same_int_p (op, -1);
+})
+
+;; Predicates used by the various SIMD shift operations.  These
+;; fall in to 3 categories.
+;;   Shifts with a range 0-(bit_size - 1) (aarch64_simd_shift_imm)
+;;   Shifts with a range 1-bit_size (aarch64_simd_shift_imm_offset)
+;;   Shifts with a range 0-bit_size (aarch64_simd_shift_imm_bitsize)
+(define_predicate "aarch64_simd_shift_imm_qi"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 7)")))
+
+(define_predicate "aarch64_simd_shift_imm_hi"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 15)")))
+
+(define_predicate "aarch64_simd_shift_imm_si"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 31)")))
+
+(define_predicate "aarch64_simd_shift_imm_di"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 63)")))
+
+(define_predicate "aarch64_simd_shift_imm_offset_qi"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 1, 8)")))
+
+(define_predicate "aarch64_simd_shift_imm_offset_hi"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 1, 16)")))
+
+(define_predicate "aarch64_simd_shift_imm_offset_si"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 1, 32)")))
+
+(define_predicate "aarch64_simd_shift_imm_offset_di"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 1, 64)")))
+
+(define_predicate "aarch64_simd_shift_imm_bitsize_qi"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 8)")))
+
+(define_predicate "aarch64_simd_shift_imm_bitsize_hi"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 16)")))
+
+(define_predicate "aarch64_simd_shift_imm_bitsize_si"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 32)")))
+
+(define_predicate "aarch64_simd_shift_imm_bitsize_di"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 64)")))
--- a/src/gcc/config/aarch64/arm_neon.h
+++ b/src/gcc/config/aarch64/arm_neon.h
@@ -2113,29 +2113,26 @@
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vqadd_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return (uint8x8_t) __builtin_aarch64_uqaddv8qi ((int8x8_t) __a,
-						  (int8x8_t) __b);
+  return __builtin_aarch64_uqaddv8qi_uuu (__a, __b);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vqadd_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return (uint16x4_t) __builtin_aarch64_uqaddv4hi ((int16x4_t) __a,
-						   (int16x4_t) __b);
+  return __builtin_aarch64_uqaddv4hi_uuu (__a, __b);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vqadd_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return (uint32x2_t) __builtin_aarch64_uqaddv2si ((int32x2_t) __a,
-						   (int32x2_t) __b);
+  return __builtin_aarch64_uqaddv2si_uuu (__a, __b);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vqadd_u64 (uint64x1_t __a, uint64x1_t __b)
 {
-  return (uint64x1_t) __builtin_aarch64_uqadddi ((int64x1_t) __a,
-						 (int64x1_t) __b);
+  return (uint64x1_t) __builtin_aarch64_uqadddi_uuu ((uint64_t) __a,
+						     (uint64_t) __b);
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
@@ -2165,29 +2162,25 @@
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vqaddq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return (uint8x16_t) __builtin_aarch64_uqaddv16qi ((int8x16_t) __a,
-						    (int8x16_t) __b);
+  return __builtin_aarch64_uqaddv16qi_uuu (__a, __b);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vqaddq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return (uint16x8_t) __builtin_aarch64_uqaddv8hi ((int16x8_t) __a,
-						   (int16x8_t) __b);
+  return __builtin_aarch64_uqaddv8hi_uuu (__a, __b);
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vqaddq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return (uint32x4_t) __builtin_aarch64_uqaddv4si ((int32x4_t) __a,
-						   (int32x4_t) __b);
+  return __builtin_aarch64_uqaddv4si_uuu (__a, __b);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vqaddq_u64 (uint64x2_t __a, uint64x2_t __b)
 {
-  return (uint64x2_t) __builtin_aarch64_uqaddv2di ((int64x2_t) __a,
-						   (int64x2_t) __b);
+  return __builtin_aarch64_uqaddv2di_uuu (__a, __b);
 }
 
 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
@@ -2217,29 +2210,26 @@
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vqsub_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return (uint8x8_t) __builtin_aarch64_uqsubv8qi ((int8x8_t) __a,
-						  (int8x8_t) __b);
+  return __builtin_aarch64_uqsubv8qi_uuu (__a, __b);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vqsub_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return (uint16x4_t) __builtin_aarch64_uqsubv4hi ((int16x4_t) __a,
-						   (int16x4_t) __b);
+  return __builtin_aarch64_uqsubv4hi_uuu (__a, __b);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vqsub_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return (uint32x2_t) __builtin_aarch64_uqsubv2si ((int32x2_t) __a,
-						   (int32x2_t) __b);
+  return __builtin_aarch64_uqsubv2si_uuu (__a, __b);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vqsub_u64 (uint64x1_t __a, uint64x1_t __b)
 {
-  return (uint64x1_t) __builtin_aarch64_uqsubdi ((int64x1_t) __a,
-						 (int64x1_t) __b);
+  return (uint64x1_t) __builtin_aarch64_uqsubdi_uuu ((uint64_t) __a,
+						     (uint64_t) __b);
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
@@ -2269,29 +2259,25 @@
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vqsubq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return (uint8x16_t) __builtin_aarch64_uqsubv16qi ((int8x16_t) __a,
-						    (int8x16_t) __b);
+  return __builtin_aarch64_uqsubv16qi_uuu (__a, __b);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vqsubq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return (uint16x8_t) __builtin_aarch64_uqsubv8hi ((int16x8_t) __a,
-						   (int16x8_t) __b);
+  return __builtin_aarch64_uqsubv8hi_uuu (__a, __b);
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vqsubq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return (uint32x4_t) __builtin_aarch64_uqsubv4si ((int32x4_t) __a,
-						   (int32x4_t) __b);
+  return __builtin_aarch64_uqsubv4si_uuu (__a, __b);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vqsubq_u64 (uint64x2_t __a, uint64x2_t __b)
 {
-  return (uint64x2_t) __builtin_aarch64_uqsubv2di ((int64x2_t) __a,
-						   (int64x2_t) __b);
+  return __builtin_aarch64_uqsubv2di_uuu (__a, __b);
 }
 
 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
@@ -2312,6 +2298,12 @@
   return (int32x2_t) __builtin_aarch64_sqnegv2si (__a);
 }
 
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vqneg_s64 (int64x1_t __a)
+{
+  return __builtin_aarch64_sqnegdi (__a);
+}
+
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
 vqnegq_s8 (int8x16_t __a)
 {
@@ -2348,6 +2340,12 @@
   return (int32x2_t) __builtin_aarch64_sqabsv2si (__a);
 }
 
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vqabs_s64 (int64x1_t __a)
+{
+  return __builtin_aarch64_sqabsdi (__a);
+}
+
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
 vqabsq_s8 (int8x16_t __a)
 {
@@ -2637,1352 +2635,1587 @@
 /* vreinterpret  */
 
 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vreinterpret_p8_f64 (float64x1_t __a)
+{
+  return __builtin_aarch64_reinterpretv8qidf_ps (__a);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
 vreinterpret_p8_s8 (int8x8_t __a)
 {
-  return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv8qi (__a);
+  return (poly8x8_t) __a;
 }
 
 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
 vreinterpret_p8_s16 (int16x4_t __a)
 {
-  return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a);
+  return (poly8x8_t) __a;
 }
 
 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
 vreinterpret_p8_s32 (int32x2_t __a)
 {
-  return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a);
+  return (poly8x8_t) __a;
 }
 
 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
 vreinterpret_p8_s64 (int64x1_t __a)
 {
-  return (poly8x8_t) __builtin_aarch64_reinterpretv8qidi (__a);
+  return (poly8x8_t) __a;
 }
 
 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
 vreinterpret_p8_f32 (float32x2_t __a)
 {
-  return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a);
+  return (poly8x8_t) __a;
 }
 
 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
 vreinterpret_p8_u8 (uint8x8_t __a)
 {
-  return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
+  return (poly8x8_t) __a;
 }
 
 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
 vreinterpret_p8_u16 (uint16x4_t __a)
 {
-  return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
+  return (poly8x8_t) __a;
 }
 
 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
 vreinterpret_p8_u32 (uint32x2_t __a)
 {
-  return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a);
+  return (poly8x8_t) __a;
 }
 
 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
 vreinterpret_p8_u64 (uint64x1_t __a)
 {
-  return (poly8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a);
+  return (poly8x8_t) __a;
 }
 
 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
 vreinterpret_p8_p16 (poly16x4_t __a)
 {
-  return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
+  return (poly8x8_t) __a;
 }
 
 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_p8_f64 (float64x2_t __a)
+{
+  return (poly8x16_t) __a;
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
 vreinterpretq_p8_s8 (int8x16_t __a)
 {
-  return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv16qi (__a);
+  return (poly8x16_t) __a;
 }
 
 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
 vreinterpretq_p8_s16 (int16x8_t __a)
 {
-  return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a);
+  return (poly8x16_t) __a;
 }
 
 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
 vreinterpretq_p8_s32 (int32x4_t __a)
 {
-  return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a);
+  return (poly8x16_t) __a;
 }
 
 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
 vreinterpretq_p8_s64 (int64x2_t __a)
 {
-  return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a);
+  return (poly8x16_t) __a;
 }
 
 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
 vreinterpretq_p8_f32 (float32x4_t __a)
 {
-  return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a);
+  return (poly8x16_t) __a;
 }
 
 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
 vreinterpretq_p8_u8 (uint8x16_t __a)
 {
-  return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
-							       __a);
+  return (poly8x16_t) __a;
 }
 
 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
 vreinterpretq_p8_u16 (uint16x8_t __a)
 {
-  return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
-							      __a);
+  return (poly8x16_t) __a;
 }
 
 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
 vreinterpretq_p8_u32 (uint32x4_t __a)
 {
-  return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t)
-							      __a);
+  return (poly8x16_t) __a;
 }
 
 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
 vreinterpretq_p8_u64 (uint64x2_t __a)
 {
-  return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t)
-							      __a);
+  return (poly8x16_t) __a;
 }
 
 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
 vreinterpretq_p8_p16 (poly16x8_t __a)
 {
-  return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
-							      __a);
+  return (poly8x16_t) __a;
 }
 
 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vreinterpret_p16_f64 (float64x1_t __a)
+{
+  return __builtin_aarch64_reinterpretv4hidf_ps (__a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
 vreinterpret_p16_s8 (int8x8_t __a)
 {
-  return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a);
+  return (poly16x4_t) __a;
 }
 
 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
 vreinterpret_p16_s16 (int16x4_t __a)
 {
-  return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv4hi (__a);
+  return (poly16x4_t) __a;
 }
 
 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
 vreinterpret_p16_s32 (int32x2_t __a)
 {
-  return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a);
+  return (poly16x4_t) __a;
 }
 
 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
 vreinterpret_p16_s64 (int64x1_t __a)
 {
-  return (poly16x4_t) __builtin_aarch64_reinterpretv4hidi (__a);
+  return (poly16x4_t) __a;
 }
 
 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
 vreinterpret_p16_f32 (float32x2_t __a)
 {
-  return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a);
+  return (poly16x4_t) __a;
 }
 
 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
 vreinterpret_p16_u8 (uint8x8_t __a)
 {
-  return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
+  return (poly16x4_t) __a;
 }
 
 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
 vreinterpret_p16_u16 (uint16x4_t __a)
 {
-  return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
+  return (poly16x4_t) __a;
 }
 
 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
 vreinterpret_p16_u32 (uint32x2_t __a)
 {
-  return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a);
+  return (poly16x4_t) __a;
 }
 
 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
 vreinterpret_p16_u64 (uint64x1_t __a)
 {
-  return (poly16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a);
+  return (poly16x4_t) __a;
 }
 
 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
 vreinterpret_p16_p8 (poly8x8_t __a)
 {
-  return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
+  return (poly16x4_t) __a;
 }
 
 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_p16_f64 (float64x2_t __a)
+{
+  return (poly16x8_t) __a;
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
 vreinterpretq_p16_s8 (int8x16_t __a)
 {
-  return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a);
+  return (poly16x8_t) __a;
 }
 
 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
 vreinterpretq_p16_s16 (int16x8_t __a)
 {
-  return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv8hi (__a);
+  return (poly16x8_t) __a;
 }
 
 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
 vreinterpretq_p16_s32 (int32x4_t __a)
 {
-  return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a);
+  return (poly16x8_t) __a;
 }
 
 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
 vreinterpretq_p16_s64 (int64x2_t __a)
 {
-  return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a);
+  return (poly16x8_t) __a;
 }
 
 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
 vreinterpretq_p16_f32 (float32x4_t __a)
 {
-  return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a);
+  return (poly16x8_t) __a;
 }
 
 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
 vreinterpretq_p16_u8 (uint8x16_t __a)
 {
-  return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
-							      __a);
+  return (poly16x8_t) __a;
 }
 
 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
 vreinterpretq_p16_u16 (uint16x8_t __a)
 {
-  return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
+  return (poly16x8_t) __a;
 }
 
 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
 vreinterpretq_p16_u32 (uint32x4_t __a)
 {
-  return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a);
+  return (poly16x8_t) __a;
 }
 
 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
 vreinterpretq_p16_u64 (uint64x2_t __a)
 {
-  return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a);
+  return (poly16x8_t) __a;
 }
 
 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
 vreinterpretq_p16_p8 (poly8x16_t __a)
 {
-  return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
-							      __a);
+  return (poly16x8_t) __a;
 }
 
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vreinterpret_f32_f64 (float64x1_t __a)
+{
+  return __builtin_aarch64_reinterpretv2sfdf (__a);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vreinterpret_f32_s8 (int8x8_t __a)
 {
-  return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi (__a);
+  return (float32x2_t) __a;
 }
 
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vreinterpret_f32_s16 (int16x4_t __a)
 {
-  return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi (__a);
+  return (float32x2_t) __a;
 }
 
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vreinterpret_f32_s32 (int32x2_t __a)
 {
-  return (float32x2_t) __builtin_aarch64_reinterpretv2sfv2si (__a);
+  return (float32x2_t) __a;
 }
 
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vreinterpret_f32_s64 (int64x1_t __a)
 {
-  return (float32x2_t) __builtin_aarch64_reinterpretv2sfdi (__a);
+  return (float32x2_t) __a;
 }
 
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vreinterpret_f32_u8 (uint8x8_t __a)
 {
-  return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi ((int8x8_t) __a);
+  return (float32x2_t) __a;
 }
 
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vreinterpret_f32_u16 (uint16x4_t __a)
 {
-  return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi ((int16x4_t)
-							      __a);
+  return (float32x2_t) __a;
 }
 
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vreinterpret_f32_u32 (uint32x2_t __a)
 {
-  return (float32x2_t) __builtin_aarch64_reinterpretv2sfv2si ((int32x2_t)
-							      __a);
+  return (float32x2_t) __a;
 }
 
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vreinterpret_f32_u64 (uint64x1_t __a)
 {
-  return (float32x2_t) __builtin_aarch64_reinterpretv2sfdi ((int64x1_t) __a);
+  return (float32x2_t) __a;
 }
 
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vreinterpret_f32_p8 (poly8x8_t __a)
 {
-  return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi ((int8x8_t) __a);
+  return (float32x2_t) __a;
 }
 
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vreinterpret_f32_p16 (poly16x4_t __a)
 {
-  return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi ((int16x4_t)
-							      __a);
+  return (float32x2_t) __a;
 }
 
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_f32_f64 (float64x2_t __a)
+{
+  return (float32x4_t) __a;
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vreinterpretq_f32_s8 (int8x16_t __a)
 {
-  return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi (__a);
+  return (float32x4_t) __a;
 }
 
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vreinterpretq_f32_s16 (int16x8_t __a)
 {
-  return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi (__a);
+  return (float32x4_t) __a;
 }
 
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vreinterpretq_f32_s32 (int32x4_t __a)
 {
-  return (float32x4_t) __builtin_aarch64_reinterpretv4sfv4si (__a);
+  return (float32x4_t) __a;
 }
 
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vreinterpretq_f32_s64 (int64x2_t __a)
 {
-  return (float32x4_t) __builtin_aarch64_reinterpretv4sfv2di (__a);
+  return (float32x4_t) __a;
 }
 
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vreinterpretq_f32_u8 (uint8x16_t __a)
 {
-  return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi ((int8x16_t)
-							       __a);
+  return (float32x4_t) __a;
 }
 
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vreinterpretq_f32_u16 (uint16x8_t __a)
 {
-  return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi ((int16x8_t)
-							      __a);
+  return (float32x4_t) __a;
 }
 
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vreinterpretq_f32_u32 (uint32x4_t __a)
 {
-  return (float32x4_t) __builtin_aarch64_reinterpretv4sfv4si ((int32x4_t)
-							      __a);
+  return (float32x4_t) __a;
 }
 
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vreinterpretq_f32_u64 (uint64x2_t __a)
 {
-  return (float32x4_t) __builtin_aarch64_reinterpretv4sfv2di ((int64x2_t)
-							      __a);
+  return (float32x4_t) __a;
 }
 
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vreinterpretq_f32_p8 (poly8x16_t __a)
 {
-  return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi ((int8x16_t)
-							       __a);
+  return (float32x4_t) __a;
 }
 
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vreinterpretq_f32_p16 (poly16x8_t __a)
 {
-  return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi ((int16x8_t)
-							      __a);
+  return (float32x4_t) __a;
 }
 
+__extension__ static __inline float64x1_t __attribute__((__always_inline__))
+vreinterpret_f64_f32 (float32x2_t __a)
+{
+  return __builtin_aarch64_reinterpretdfv2sf (__a);
+}
+
+__extension__ static __inline float64x1_t __attribute__((__always_inline__))
+vreinterpret_f64_p8 (poly8x8_t __a)
+{
+  return __builtin_aarch64_reinterpretdfv8qi_sp (__a);
+}
+
+__extension__ static __inline float64x1_t __attribute__((__always_inline__))
+vreinterpret_f64_p16 (poly16x4_t __a)
+{
+  return __builtin_aarch64_reinterpretdfv4hi_sp (__a);
+}
+
+__extension__ static __inline float64x1_t __attribute__((__always_inline__))
+vreinterpret_f64_s8 (int8x8_t __a)
+{
+  return __builtin_aarch64_reinterpretdfv8qi (__a);
+}
+
+__extension__ static __inline float64x1_t __attribute__((__always_inline__))
+vreinterpret_f64_s16 (int16x4_t __a)
+{
+  return __builtin_aarch64_reinterpretdfv4hi (__a);
+}
+
+__extension__ static __inline float64x1_t __attribute__((__always_inline__))
+vreinterpret_f64_s32 (int32x2_t __a)
+{
+  return __builtin_aarch64_reinterpretdfv2si (__a);
+}
+
+__extension__ static __inline float64x1_t __attribute__((__always_inline__))
+vreinterpret_f64_s64 (int64x1_t __a)
+{
+  return __builtin_aarch64_createdf ((uint64_t) vget_lane_s64 (__a, 0));
+}
+
+__extension__ static __inline float64x1_t __attribute__((__always_inline__))
+vreinterpret_f64_u8 (uint8x8_t __a)
+{
+  return __builtin_aarch64_reinterpretdfv8qi_su (__a);
+}
+
+__extension__ static __inline float64x1_t __attribute__((__always_inline__))
+vreinterpret_f64_u16 (uint16x4_t __a)
+{
+  return __builtin_aarch64_reinterpretdfv4hi_su (__a);
+}
+
+__extension__ static __inline float64x1_t __attribute__((__always_inline__))
+vreinterpret_f64_u32 (uint32x2_t __a)
+{
+  return __builtin_aarch64_reinterpretdfv2si_su (__a);
+}
+
+__extension__ static __inline float64x1_t __attribute__((__always_inline__))
+vreinterpret_f64_u64 (uint64x1_t __a)
+{
+  return __builtin_aarch64_createdf (vget_lane_u64 (__a, 0));
+}
+
+__extension__ static __inline float64x2_t __attribute__((__always_inline__))
+vreinterpretq_f64_f32 (float32x4_t __a)
+{
+  return (float64x2_t) __a;
+}
+
+__extension__ static __inline float64x2_t __attribute__((__always_inline__))
+vreinterpretq_f64_p8 (poly8x16_t __a)
+{
+  return (float64x2_t) __a;
+}
+
+__extension__ static __inline float64x2_t __attribute__((__always_inline__))
+vreinterpretq_f64_p16 (poly16x8_t __a)
+{
+  return (float64x2_t) __a;
+}
+
+__extension__ static __inline float64x2_t __attribute__((__always_inline__))
+vreinterpretq_f64_s8 (int8x16_t __a)
+{
+  return (float64x2_t) __a;
+}
+
+__extension__ static __inline float64x2_t __attribute__((__always_inline__))
+vreinterpretq_f64_s16 (int16x8_t __a)
+{
+  return (float64x2_t) __a;
+}
+
+__extension__ static __inline float64x2_t __attribute__((__always_inline__))
+vreinterpretq_f64_s32 (int32x4_t __a)
+{
+  return (float64x2_t) __a;
+}
+
+__extension__ static __inline float64x2_t __attribute__((__always_inline__))
+vreinterpretq_f64_s64 (int64x2_t __a)
+{
+  return (float64x2_t) __a;
+}
+
+__extension__ static __inline float64x2_t __attribute__((__always_inline__))
+vreinterpretq_f64_u8 (uint8x16_t __a)
+{
+  return (float64x2_t) __a;
+}
+
+__extension__ static __inline float64x2_t __attribute__((__always_inline__))
+vreinterpretq_f64_u16 (uint16x8_t __a)
+{
+  return (float64x2_t) __a;
+}
+
+__extension__ static __inline float64x2_t __attribute__((__always_inline__))
+vreinterpretq_f64_u32 (uint32x4_t __a)
+{
+  return (float64x2_t) __a;
+}
+
+__extension__ static __inline float64x2_t __attribute__((__always_inline__))
+vreinterpretq_f64_u64 (uint64x2_t __a)
+{
+  return (float64x2_t) __a;
+}
+
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vreinterpret_s64_f64 (float64x1_t __a)
+{
+  return __builtin_aarch64_reinterpretdidf (__a);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vreinterpret_s64_s8 (int8x8_t __a)
 {
-  return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi (__a);
+  return (int64x1_t) __a;
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vreinterpret_s64_s16 (int16x4_t __a)
 {
-  return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi (__a);
+  return (int64x1_t) __a;
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vreinterpret_s64_s32 (int32x2_t __a)
 {
-  return (int64x1_t) __builtin_aarch64_reinterpretdiv2si (__a);
+  return (int64x1_t) __a;
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vreinterpret_s64_f32 (float32x2_t __a)
 {
-  return (int64x1_t) __builtin_aarch64_reinterpretdiv2sf (__a);
+  return (int64x1_t) __a;
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vreinterpret_s64_u8 (uint8x8_t __a)
 {
-  return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
+  return (int64x1_t) __a;
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vreinterpret_s64_u16 (uint16x4_t __a)
 {
-  return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
+  return (int64x1_t) __a;
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vreinterpret_s64_u32 (uint32x2_t __a)
 {
-  return (int64x1_t) __builtin_aarch64_reinterpretdiv2si ((int32x2_t) __a);
+  return (int64x1_t) __a;
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vreinterpret_s64_u64 (uint64x1_t __a)
 {
-  return (int64x1_t) __builtin_aarch64_reinterpretdidi ((int64x1_t) __a);
+  return (int64x1_t) __a;
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vreinterpret_s64_p8 (poly8x8_t __a)
 {
-  return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
+  return (int64x1_t) __a;
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vreinterpret_s64_p16 (poly16x4_t __a)
 {
-  return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
+  return (int64x1_t) __a;
 }
 
 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_s64_f64 (float64x2_t __a)
+{
+  return (int64x2_t) __a;
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
 vreinterpretq_s64_s8 (int8x16_t __a)
 {
-  return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi (__a);
+  return (int64x2_t) __a;
 }
 
 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
 vreinterpretq_s64_s16 (int16x8_t __a)
 {
-  return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi (__a);
+  return (int64x2_t) __a;
 }
 
 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
 vreinterpretq_s64_s32 (int32x4_t __a)
 {
-  return (int64x2_t) __builtin_aarch64_reinterpretv2div4si (__a);
+  return (int64x2_t) __a;
 }
 
 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
 vreinterpretq_s64_f32 (float32x4_t __a)
 {
-  return (int64x2_t) __builtin_aarch64_reinterpretv2div4sf (__a);
+  return (int64x2_t) __a;
 }
 
 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
 vreinterpretq_s64_u8 (uint8x16_t __a)
 {
-  return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) __a);
+  return (int64x2_t) __a;
 }
 
 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
 vreinterpretq_s64_u16 (uint16x8_t __a)
 {
-  return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
+  return (int64x2_t) __a;
 }
 
 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
 vreinterpretq_s64_u32 (uint32x4_t __a)
 {
-  return (int64x2_t) __builtin_aarch64_reinterpretv2div4si ((int32x4_t) __a);
+  return (int64x2_t) __a;
 }
 
 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
 vreinterpretq_s64_u64 (uint64x2_t __a)
 {
-  return (int64x2_t) __builtin_aarch64_reinterpretv2div2di ((int64x2_t) __a);
+  return (int64x2_t) __a;
 }
 
 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
 vreinterpretq_s64_p8 (poly8x16_t __a)
 {
-  return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) __a);
+  return (int64x2_t) __a;
 }
 
 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
 vreinterpretq_s64_p16 (poly16x8_t __a)
 {
-  return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
+  return (int64x2_t) __a;
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vreinterpret_u64_f64 (float64x1_t __a)
+{
+  return __builtin_aarch64_reinterpretdidf_us (__a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vreinterpret_u64_s8 (int8x8_t __a)
 {
-  return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi (__a);
+  return (uint64x1_t) __a;
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vreinterpret_u64_s16 (int16x4_t __a)
 {
-  return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi (__a);
+  return (uint64x1_t) __a;
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vreinterpret_u64_s32 (int32x2_t __a)
 {
-  return (uint64x1_t) __builtin_aarch64_reinterpretdiv2si (__a);
+  return (uint64x1_t) __a;
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vreinterpret_u64_s64 (int64x1_t __a)
 {
-  return (uint64x1_t) __builtin_aarch64_reinterpretdidi (__a);
+  return (uint64x1_t) __a;
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vreinterpret_u64_f32 (float32x2_t __a)
 {
-  return (uint64x1_t) __builtin_aarch64_reinterpretdiv2sf (__a);
+  return (uint64x1_t) __a;
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vreinterpret_u64_u8 (uint8x8_t __a)
 {
-  return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
+  return (uint64x1_t) __a;
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vreinterpret_u64_u16 (uint16x4_t __a)
 {
-  return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
+  return (uint64x1_t) __a;
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vreinterpret_u64_u32 (uint32x2_t __a)
 {
-  return (uint64x1_t) __builtin_aarch64_reinterpretdiv2si ((int32x2_t) __a);
+  return (uint64x1_t) __a;
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vreinterpret_u64_p8 (poly8x8_t __a)
 {
-  return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
+  return (uint64x1_t) __a;
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vreinterpret_u64_p16 (poly16x4_t __a)
 {
-  return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
+  return (uint64x1_t) __a;
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_u64_f64 (float64x2_t __a)
+{
+  return (uint64x2_t) __a;
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vreinterpretq_u64_s8 (int8x16_t __a)
 {
-  return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi (__a);
+  return (uint64x2_t) __a;
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vreinterpretq_u64_s16 (int16x8_t __a)
 {
-  return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi (__a);
+  return (uint64x2_t) __a;
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vreinterpretq_u64_s32 (int32x4_t __a)
 {
-  return (uint64x2_t) __builtin_aarch64_reinterpretv2div4si (__a);
+  return (uint64x2_t) __a;
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vreinterpretq_u64_s64 (int64x2_t __a)
 {
-  return (uint64x2_t) __builtin_aarch64_reinterpretv2div2di (__a);
+  return (uint64x2_t) __a;
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vreinterpretq_u64_f32 (float32x4_t __a)
 {
-  return (uint64x2_t) __builtin_aarch64_reinterpretv2div4sf (__a);
+  return (uint64x2_t) __a;
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vreinterpretq_u64_u8 (uint8x16_t __a)
 {
-  return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t)
-							      __a);
+  return (uint64x2_t) __a;
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vreinterpretq_u64_u16 (uint16x8_t __a)
 {
-  return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
+  return (uint64x2_t) __a;
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vreinterpretq_u64_u32 (uint32x4_t __a)
 {
-  return (uint64x2_t) __builtin_aarch64_reinterpretv2div4si ((int32x4_t) __a);
+  return (uint64x2_t) __a;
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vreinterpretq_u64_p8 (poly8x16_t __a)
 {
-  return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t)
-							      __a);
+  return (uint64x2_t) __a;
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vreinterpretq_u64_p16 (poly16x8_t __a)
 {
-  return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
+  return (uint64x2_t) __a;
 }
 
 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vreinterpret_s8_f64 (float64x1_t __a)
+{
+  return __builtin_aarch64_reinterpretv8qidf (__a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
 vreinterpret_s8_s16 (int16x4_t __a)
 {
-  return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a);
+  return (int8x8_t) __a;
 }
 
 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
 vreinterpret_s8_s32 (int32x2_t __a)
 {
-  return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a);
+  return (int8x8_t) __a;
 }
 
 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
 vreinterpret_s8_s64 (int64x1_t __a)
 {
-  return (int8x8_t) __builtin_aarch64_reinterpretv8qidi (__a);
+  return (int8x8_t) __a;
 }
 
 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
 vreinterpret_s8_f32 (float32x2_t __a)
 {
-  return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a);
+  return (int8x8_t) __a;
 }
 
 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
 vreinterpret_s8_u8 (uint8x8_t __a)
 {
-  return (int8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
+  return (int8x8_t) __a;
 }
 
 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
 vreinterpret_s8_u16 (uint16x4_t __a)
 {
-  return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
+  return (int8x8_t) __a;
 }
 
 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
 vreinterpret_s8_u32 (uint32x2_t __a)
 {
-  return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a);
+  return (int8x8_t) __a;
 }
 
 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
 vreinterpret_s8_u64 (uint64x1_t __a)
 {
-  return (int8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a);
+  return (int8x8_t) __a;
 }
 
 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
 vreinterpret_s8_p8 (poly8x8_t __a)
 {
-  return (int8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
+  return (int8x8_t) __a;
 }
 
 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
 vreinterpret_s8_p16 (poly16x4_t __a)
 {
-  return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
+  return (int8x8_t) __a;
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_s8_f64 (float64x2_t __a)
+{
+  return (int8x16_t) __a;
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
 vreinterpretq_s8_s16 (int16x8_t __a)
 {
-  return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a);
+  return (int8x16_t) __a;
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
 vreinterpretq_s8_s32 (int32x4_t __a)
 {
-  return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a);
+  return (int8x16_t) __a;
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
 vreinterpretq_s8_s64 (int64x2_t __a)
 {
-  return (int8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a);
+  return (int8x16_t) __a;
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
 vreinterpretq_s8_f32 (float32x4_t __a)
 {
-  return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a);
+  return (int8x16_t) __a;
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
 vreinterpretq_s8_u8 (uint8x16_t __a)
 {
-  return (int8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
-							      __a);
+  return (int8x16_t) __a;
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
 vreinterpretq_s8_u16 (uint16x8_t __a)
 {
-  return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) __a);
+  return (int8x16_t) __a;
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
 vreinterpretq_s8_u32 (uint32x4_t __a)
 {
-  return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t) __a);
+  return (int8x16_t) __a;
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
 vreinterpretq_s8_u64 (uint64x2_t __a)
 {
-  return (int8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t) __a);
+  return (int8x16_t) __a;
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
 vreinterpretq_s8_p8 (poly8x16_t __a)
 {
-  return (int8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
-							      __a);
+  return (int8x16_t) __a;
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
 vreinterpretq_s8_p16 (poly16x8_t __a)
 {
-  return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) __a);
+  return (int8x16_t) __a;
 }
 
 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vreinterpret_s16_f64 (float64x1_t __a)
+{
+  return __builtin_aarch64_reinterpretv4hidf (__a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
 vreinterpret_s16_s8 (int8x8_t __a)
 {
-  return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a);
+  return (int16x4_t) __a;
 }
 
 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
 vreinterpret_s16_s32 (int32x2_t __a)
 {
-  return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a);
+  return (int16x4_t) __a;
 }
 
 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
 vreinterpret_s16_s64 (int64x1_t __a)
 {
-  return (int16x4_t) __builtin_aarch64_reinterpretv4hidi (__a);
+  return (int16x4_t) __a;
 }
 
 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
 vreinterpret_s16_f32 (float32x2_t __a)
 {
-  return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a);
+  return (int16x4_t) __a;
 }
 
 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
 vreinterpret_s16_u8 (uint8x8_t __a)
 {
-  return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
+  return (int16x4_t) __a;
 }
 
 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
 vreinterpret_s16_u16 (uint16x4_t __a)
 {
-  return (int16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
+  return (int16x4_t) __a;
 }
 
 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
 vreinterpret_s16_u32 (uint32x2_t __a)
 {
-  return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a);
+  return (int16x4_t) __a;
 }
 
 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
 vreinterpret_s16_u64 (uint64x1_t __a)
 {
-  return (int16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a);
+  return (int16x4_t) __a;
 }
 
 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
 vreinterpret_s16_p8 (poly8x8_t __a)
 {
-  return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
+  return (int16x4_t) __a;
 }
 
 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
 vreinterpret_s16_p16 (poly16x4_t __a)
 {
-  return (int16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
+  return (int16x4_t) __a;
 }
 
 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_s16_f64 (float64x2_t __a)
+{
+  return (int16x8_t) __a;
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
 vreinterpretq_s16_s8 (int8x16_t __a)
 {
-  return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a);
+  return (int16x8_t) __a;
 }
 
 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
 vreinterpretq_s16_s32 (int32x4_t __a)
 {
-  return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a);
+  return (int16x8_t) __a;
 }
 
 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
 vreinterpretq_s16_s64 (int64x2_t __a)
 {
-  return (int16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a);
+  return (int16x8_t) __a;
 }
 
 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
 vreinterpretq_s16_f32 (float32x4_t __a)
 {
-  return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a);
+  return (int16x8_t) __a;
 }
 
 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
 vreinterpretq_s16_u8 (uint8x16_t __a)
 {
-  return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) __a);
+  return (int16x8_t) __a;
 }
 
 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
 vreinterpretq_s16_u16 (uint16x8_t __a)
 {
-  return (int16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
+  return (int16x8_t) __a;
 }
 
 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
 vreinterpretq_s16_u32 (uint32x4_t __a)
 {
-  return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a);
+  return (int16x8_t) __a;
 }
 
 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
 vreinterpretq_s16_u64 (uint64x2_t __a)
 {
-  return (int16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a);
+  return (int16x8_t) __a;
 }
 
 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
 vreinterpretq_s16_p8 (poly8x16_t __a)
 {
-  return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) __a);
+  return (int16x8_t) __a;
 }
 
 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
 vreinterpretq_s16_p16 (poly16x8_t __a)
 {
-  return (int16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
+  return (int16x8_t) __a;
 }
 
 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vreinterpret_s32_f64 (float64x1_t __a)
+{
+  return __builtin_aarch64_reinterpretv2sidf (__a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
 vreinterpret_s32_s8 (int8x8_t __a)
 {
-  return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi (__a);
+  return (int32x2_t) __a;
 }
 
 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
 vreinterpret_s32_s16 (int16x4_t __a)
 {
-  return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi (__a);
+  return (int32x2_t) __a;
 }
 
 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
 vreinterpret_s32_s64 (int64x1_t __a)
 {
-  return (int32x2_t) __builtin_aarch64_reinterpretv2sidi (__a);
+  return (int32x2_t) __a;
 }
 
 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
 vreinterpret_s32_f32 (float32x2_t __a)
 {
-  return (int32x2_t) __builtin_aarch64_reinterpretv2siv2sf (__a);
+  return (int32x2_t) __a;
 }
 
 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
 vreinterpret_s32_u8 (uint8x8_t __a)
 {
-  return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
+  return (int32x2_t) __a;
 }
 
 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
 vreinterpret_s32_u16 (uint16x4_t __a)
 {
-  return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
+  return (int32x2_t) __a;
 }
 
 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
 vreinterpret_s32_u32 (uint32x2_t __a)
 {
-  return (int32x2_t) __builtin_aarch64_reinterpretv2siv2si ((int32x2_t) __a);
+  return (int32x2_t) __a;
 }
 
 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
 vreinterpret_s32_u64 (uint64x1_t __a)
 {
-  return (int32x2_t) __builtin_aarch64_reinterpretv2sidi ((int64x1_t) __a);
+  return (int32x2_t) __a;
 }
 
 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
 vreinterpret_s32_p8 (poly8x8_t __a)
 {
-  return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
+  return (int32x2_t) __a;
 }
 
 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
 vreinterpret_s32_p16 (poly16x4_t __a)
 {
-  return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
+  return (int32x2_t) __a;
 }
 
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_s32_f64 (float64x2_t __a)
+{
+  return (int32x4_t) __a;
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
 vreinterpretq_s32_s8 (int8x16_t __a)
 {
-  return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi (__a);
+  return (int32x4_t) __a;
 }
 
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
 vreinterpretq_s32_s16 (int16x8_t __a)
 {
-  return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi (__a);
+  return (int32x4_t) __a;
 }
 
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
 vreinterpretq_s32_s64 (int64x2_t __a)
 {
-  return (int32x4_t) __builtin_aarch64_reinterpretv4siv2di (__a);
+  return (int32x4_t) __a;
 }
 
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
 vreinterpretq_s32_f32 (float32x4_t __a)
 {
-  return (int32x4_t) __builtin_aarch64_reinterpretv4siv4sf (__a);
+  return (int32x4_t) __a;
 }
 
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
 vreinterpretq_s32_u8 (uint8x16_t __a)
 {
-  return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) __a);
+  return (int32x4_t) __a;
 }
 
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
 vreinterpretq_s32_u16 (uint16x8_t __a)
 {
-  return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
+  return (int32x4_t) __a;
 }
 
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
 vreinterpretq_s32_u32 (uint32x4_t __a)
 {
-  return (int32x4_t) __builtin_aarch64_reinterpretv4siv4si ((int32x4_t) __a);
+  return (int32x4_t) __a;
 }
 
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
 vreinterpretq_s32_u64 (uint64x2_t __a)
 {
-  return (int32x4_t) __builtin_aarch64_reinterpretv4siv2di ((int64x2_t) __a);
+  return (int32x4_t) __a;
 }
 
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
 vreinterpretq_s32_p8 (poly8x16_t __a)
 {
-  return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) __a);
+  return (int32x4_t) __a;
 }
 
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
 vreinterpretq_s32_p16 (poly16x8_t __a)
 {
-  return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
+  return (int32x4_t) __a;
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vreinterpret_u8_f64 (float64x1_t __a)
+{
+  return __builtin_aarch64_reinterpretv8qidf_us (__a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vreinterpret_u8_s8 (int8x8_t __a)
 {
-  return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv8qi (__a);
+  return (uint8x8_t) __a;
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vreinterpret_u8_s16 (int16x4_t __a)
 {
-  return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a);
+  return (uint8x8_t) __a;
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vreinterpret_u8_s32 (int32x2_t __a)
 {
-  return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a);
+  return (uint8x8_t) __a;
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vreinterpret_u8_s64 (int64x1_t __a)
 {
-  return (uint8x8_t) __builtin_aarch64_reinterpretv8qidi (__a);
+  return (uint8x8_t) __a;
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vreinterpret_u8_f32 (float32x2_t __a)
 {
-  return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a);
+  return (uint8x8_t) __a;
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vreinterpret_u8_u16 (uint16x4_t __a)
 {
-  return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
+  return (uint8x8_t) __a;
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vreinterpret_u8_u32 (uint32x2_t __a)
 {
-  return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a);
+  return (uint8x8_t) __a;
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vreinterpret_u8_u64 (uint64x1_t __a)
 {
-  return (uint8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a);
+  return (uint8x8_t) __a;
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vreinterpret_u8_p8 (poly8x8_t __a)
 {
-  return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
+  return (uint8x8_t) __a;
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vreinterpret_u8_p16 (poly16x4_t __a)
 {
-  return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
+  return (uint8x8_t) __a;
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_u8_f64 (float64x2_t __a)
+{
+  return (uint8x16_t) __a;
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vreinterpretq_u8_s8 (int8x16_t __a)
 {
-  return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv16qi (__a);
+  return (uint8x16_t) __a;
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vreinterpretq_u8_s16 (int16x8_t __a)
 {
-  return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a);
+  return (uint8x16_t) __a;
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vreinterpretq_u8_s32 (int32x4_t __a)
 {
-  return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a);
+  return (uint8x16_t) __a;
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vreinterpretq_u8_s64 (int64x2_t __a)
 {
-  return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a);
+  return (uint8x16_t) __a;
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vreinterpretq_u8_f32 (float32x4_t __a)
 {
-  return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a);
+  return (uint8x16_t) __a;
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vreinterpretq_u8_u16 (uint16x8_t __a)
 {
-  return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
-							      __a);
+  return (uint8x16_t) __a;
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vreinterpretq_u8_u32 (uint32x4_t __a)
 {
-  return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t)
-							      __a);
+  return (uint8x16_t) __a;
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vreinterpretq_u8_u64 (uint64x2_t __a)
 {
-  return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t)
-							      __a);
+  return (uint8x16_t) __a;
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vreinterpretq_u8_p8 (poly8x16_t __a)
 {
-  return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
-							       __a);
+  return (uint8x16_t) __a;
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vreinterpretq_u8_p16 (poly16x8_t __a)
 {
-  return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
-							      __a);
+  return (uint8x16_t) __a;
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vreinterpret_u16_f64 (float64x1_t __a)
+{
+  return __builtin_aarch64_reinterpretv4hidf_us (__a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vreinterpret_u16_s8 (int8x8_t __a)
 {
-  return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a);
+  return (uint16x4_t) __a;
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vreinterpret_u16_s16 (int16x4_t __a)
 {
-  return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv4hi (__a);
+  return (uint16x4_t) __a;
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vreinterpret_u16_s32 (int32x2_t __a)
 {
-  return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a);
+  return (uint16x4_t) __a;
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vreinterpret_u16_s64 (int64x1_t __a)
 {
-  return (uint16x4_t) __builtin_aarch64_reinterpretv4hidi (__a);
+  return (uint16x4_t) __a;
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vreinterpret_u16_f32 (float32x2_t __a)
 {
-  return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a);
+  return (uint16x4_t) __a;
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vreinterpret_u16_u8 (uint8x8_t __a)
 {
-  return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
+  return (uint16x4_t) __a;
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vreinterpret_u16_u32 (uint32x2_t __a)
 {
-  return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a);
+  return (uint16x4_t) __a;
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vreinterpret_u16_u64 (uint64x1_t __a)
 {
-  return (uint16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a);
+  return (uint16x4_t) __a;
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vreinterpret_u16_p8 (poly8x8_t __a)
 {
-  return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
+  return (uint16x4_t) __a;
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vreinterpret_u16_p16 (poly16x4_t __a)
 {
-  return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
+  return (uint16x4_t) __a;
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_u16_f64 (float64x2_t __a)
+{
+  return (uint16x8_t) __a;
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vreinterpretq_u16_s8 (int8x16_t __a)
 {
-  return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a);
+  return (uint16x8_t) __a;
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vreinterpretq_u16_s16 (int16x8_t __a)
 {
-  return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv8hi (__a);
+  return (uint16x8_t) __a;
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vreinterpretq_u16_s32 (int32x4_t __a)
 {
-  return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a);
+  return (uint16x8_t) __a;
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vreinterpretq_u16_s64 (int64x2_t __a)
 {
-  return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a);
+  return (uint16x8_t) __a;
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vreinterpretq_u16_f32 (float32x4_t __a)
 {
-  return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a);
+  return (uint16x8_t) __a;
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vreinterpretq_u16_u8 (uint8x16_t __a)
 {
-  return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
-							      __a);
+  return (uint16x8_t) __a;
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vreinterpretq_u16_u32 (uint32x4_t __a)
 {
-  return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a);
+  return (uint16x8_t) __a;
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vreinterpretq_u16_u64 (uint64x2_t __a)
 {
-  return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a);
+  return (uint16x8_t) __a;
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vreinterpretq_u16_p8 (poly8x16_t __a)
 {
-  return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
-							      __a);
+  return (uint16x8_t) __a;
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vreinterpretq_u16_p16 (poly16x8_t __a)
 {
-  return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
+  return (uint16x8_t) __a;
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vreinterpret_u32_f64 (float64x1_t __a)
+{
+  return __builtin_aarch64_reinterpretv2sidf_us (__a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vreinterpret_u32_s8 (int8x8_t __a)
 {
-  return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi (__a);
+  return (uint32x2_t) __a;
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vreinterpret_u32_s16 (int16x4_t __a)
 {
-  return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi (__a);
+  return (uint32x2_t) __a;
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vreinterpret_u32_s32 (int32x2_t __a)
 {
-  return (uint32x2_t) __builtin_aarch64_reinterpretv2siv2si (__a);
+  return (uint32x2_t) __a;
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vreinterpret_u32_s64 (int64x1_t __a)
 {
-  return (uint32x2_t) __builtin_aarch64_reinterpretv2sidi (__a);
+  return (uint32x2_t) __a;
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vreinterpret_u32_f32 (float32x2_t __a)
 {
-  return (uint32x2_t) __builtin_aarch64_reinterpretv2siv2sf (__a);
+  return (uint32x2_t) __a;
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vreinterpret_u32_u8 (uint8x8_t __a)
 {
-  return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
+  return (uint32x2_t) __a;
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vreinterpret_u32_u16 (uint16x4_t __a)
 {
-  return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
+  return (uint32x2_t) __a;
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vreinterpret_u32_u64 (uint64x1_t __a)
 {
-  return (uint32x2_t) __builtin_aarch64_reinterpretv2sidi ((int64x1_t) __a);
+  return (uint32x2_t) __a;
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vreinterpret_u32_p8 (poly8x8_t __a)
 {
-  return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
+  return (uint32x2_t) __a;
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vreinterpret_u32_p16 (poly16x4_t __a)
 {
-  return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
+  return (uint32x2_t) __a;
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_u32_f64 (float64x2_t __a)
+{
+  return (uint32x4_t) __a;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vreinterpretq_u32_s8 (int8x16_t __a)
 {
-  return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi (__a);
+  return (uint32x4_t) __a;
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vreinterpretq_u32_s16 (int16x8_t __a)
 {
-  return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi (__a);
+  return (uint32x4_t) __a;
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vreinterpretq_u32_s32 (int32x4_t __a)
 {
-  return (uint32x4_t) __builtin_aarch64_reinterpretv4siv4si (__a);
+  return (uint32x4_t) __a;
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vreinterpretq_u32_s64 (int64x2_t __a)
 {
-  return (uint32x4_t) __builtin_aarch64_reinterpretv4siv2di (__a);
+  return (uint32x4_t) __a;
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vreinterpretq_u32_f32 (float32x4_t __a)
 {
-  return (uint32x4_t) __builtin_aarch64_reinterpretv4siv4sf (__a);
+  return (uint32x4_t) __a;
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vreinterpretq_u32_u8 (uint8x16_t __a)
 {
-  return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t)
-							      __a);
+  return (uint32x4_t) __a;
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vreinterpretq_u32_u16 (uint16x8_t __a)
 {
-  return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
+  return (uint32x4_t) __a;
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vreinterpretq_u32_u64 (uint64x2_t __a)
 {
-  return (uint32x4_t) __builtin_aarch64_reinterpretv4siv2di ((int64x2_t) __a);
+  return (uint32x4_t) __a;
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vreinterpretq_u32_p8 (poly8x16_t __a)
 {
-  return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t)
-							      __a);
+  return (uint32x4_t) __a;
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vreinterpretq_u32_p16 (poly16x8_t __a)
 {
-  return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
+  return (uint32x4_t) __a;
 }
 
 #define __GET_LOW(__TYPE) \
@@ -4064,6 +4297,85 @@
 
 #undef __GET_LOW
 
+#define __GET_HIGH(__TYPE)					\
+  uint64x2_t tmp = vreinterpretq_u64_##__TYPE (__a);		\
+  uint64x1_t hi = vcreate_u64 (vgetq_lane_u64 (tmp, 1));	\
+  return vreinterpret_##__TYPE##_u64 (hi);
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vget_high_f32 (float32x4_t __a)
+{
+  __GET_HIGH (f32);
+}
+
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vget_high_f64 (float64x2_t __a)
+{
+  __GET_HIGH (f64);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vget_high_p8 (poly8x16_t __a)
+{
+  __GET_HIGH (p8);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vget_high_p16 (poly16x8_t __a)
+{
+  __GET_HIGH (p16);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vget_high_s8 (int8x16_t __a)
+{
+  __GET_HIGH (s8);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vget_high_s16 (int16x8_t __a)
+{
+  __GET_HIGH (s16);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vget_high_s32 (int32x4_t __a)
+{
+  __GET_HIGH (s32);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vget_high_s64 (int64x2_t __a)
+{
+  __GET_HIGH (s64);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vget_high_u8 (uint8x16_t __a)
+{
+  __GET_HIGH (u8);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vget_high_u16 (uint16x8_t __a)
+{
+  __GET_HIGH (u16);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vget_high_u32 (uint32x4_t __a)
+{
+  __GET_HIGH (u32);
+}
+
+#undef __GET_HIGH
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vget_high_u64 (uint64x2_t __a)
+{
+  return vcreate_u64 (vgetq_lane_u64 (__a, 1));
+}
+
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
 vcombine_s8 (int8x8_t __a, int8x8_t __b)
 {
@@ -5408,318 +5720,6 @@
   return result;
 }
 
-#define vext_f32(a, b, c)                                               \
-  __extension__                                                         \
-    ({                                                                  \
-       float32x2_t b_ = (b);                                            \
-       float32x2_t a_ = (a);                                            \
-       float32x2_t result;                                              \
-       __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4"                        \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vext_f64(a, b, c)                                               \
-  __extension__                                                         \
-    ({                                                                  \
-       float64x1_t b_ = (b);                                            \
-       float64x1_t a_ = (a);                                            \
-       float64x1_t result;                                              \
-       __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8"                        \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vext_p8(a, b, c)                                                \
-  __extension__                                                         \
-    ({                                                                  \
-       poly8x8_t b_ = (b);                                              \
-       poly8x8_t a_ = (a);                                              \
-       poly8x8_t result;                                                \
-       __asm__ ("ext %0.8b,%1.8b,%2.8b,%3"                              \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vext_p16(a, b, c)                                               \
-  __extension__                                                         \
-    ({                                                                  \
-       poly16x4_t b_ = (b);                                             \
-       poly16x4_t a_ = (a);                                             \
-       poly16x4_t result;                                               \
-       __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2"                        \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vext_s8(a, b, c)                                                \
-  __extension__                                                         \
-    ({                                                                  \
-       int8x8_t b_ = (b);                                               \
-       int8x8_t a_ = (a);                                               \
-       int8x8_t result;                                                 \
-       __asm__ ("ext %0.8b,%1.8b,%2.8b,%3"                              \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vext_s16(a, b, c)                                               \
-  __extension__                                                         \
-    ({                                                                  \
-       int16x4_t b_ = (b);                                              \
-       int16x4_t a_ = (a);                                              \
-       int16x4_t result;                                                \
-       __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2"                        \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vext_s32(a, b, c)                                               \
-  __extension__                                                         \
-    ({                                                                  \
-       int32x2_t b_ = (b);                                              \
-       int32x2_t a_ = (a);                                              \
-       int32x2_t result;                                                \
-       __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4"                        \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vext_s64(a, b, c)                                               \
-  __extension__                                                         \
-    ({                                                                  \
-       int64x1_t b_ = (b);                                              \
-       int64x1_t a_ = (a);                                              \
-       int64x1_t result;                                                \
-       __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8"                        \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vext_u8(a, b, c)                                                \
-  __extension__                                                         \
-    ({                                                                  \
-       uint8x8_t b_ = (b);                                              \
-       uint8x8_t a_ = (a);                                              \
-       uint8x8_t result;                                                \
-       __asm__ ("ext %0.8b,%1.8b,%2.8b,%3"                              \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vext_u16(a, b, c)                                               \
-  __extension__                                                         \
-    ({                                                                  \
-       uint16x4_t b_ = (b);                                             \
-       uint16x4_t a_ = (a);                                             \
-       uint16x4_t result;                                               \
-       __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2"                        \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vext_u32(a, b, c)                                               \
-  __extension__                                                         \
-    ({                                                                  \
-       uint32x2_t b_ = (b);                                             \
-       uint32x2_t a_ = (a);                                             \
-       uint32x2_t result;                                               \
-       __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4"                        \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vext_u64(a, b, c)                                               \
-  __extension__                                                         \
-    ({                                                                  \
-       uint64x1_t b_ = (b);                                             \
-       uint64x1_t a_ = (a);                                             \
-       uint64x1_t result;                                               \
-       __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8"                        \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vextq_f32(a, b, c)                                              \
-  __extension__                                                         \
-    ({                                                                  \
-       float32x4_t b_ = (b);                                            \
-       float32x4_t a_ = (a);                                            \
-       float32x4_t result;                                              \
-       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4"                     \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vextq_f64(a, b, c)                                              \
-  __extension__                                                         \
-    ({                                                                  \
-       float64x2_t b_ = (b);                                            \
-       float64x2_t a_ = (a);                                            \
-       float64x2_t result;                                              \
-       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8"                     \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vextq_p8(a, b, c)                                               \
-  __extension__                                                         \
-    ({                                                                  \
-       poly8x16_t b_ = (b);                                             \
-       poly8x16_t a_ = (a);                                             \
-       poly8x16_t result;                                               \
-       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3"                       \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vextq_p16(a, b, c)                                              \
-  __extension__                                                         \
-    ({                                                                  \
-       poly16x8_t b_ = (b);                                             \
-       poly16x8_t a_ = (a);                                             \
-       poly16x8_t result;                                               \
-       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2"                     \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vextq_s8(a, b, c)                                               \
-  __extension__                                                         \
-    ({                                                                  \
-       int8x16_t b_ = (b);                                              \
-       int8x16_t a_ = (a);                                              \
-       int8x16_t result;                                                \
-       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3"                       \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vextq_s16(a, b, c)                                              \
-  __extension__                                                         \
-    ({                                                                  \
-       int16x8_t b_ = (b);                                              \
-       int16x8_t a_ = (a);                                              \
-       int16x8_t result;                                                \
-       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2"                     \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vextq_s32(a, b, c)                                              \
-  __extension__                                                         \
-    ({                                                                  \
-       int32x4_t b_ = (b);                                              \
-       int32x4_t a_ = (a);                                              \
-       int32x4_t result;                                                \
-       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4"                     \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vextq_s64(a, b, c)                                              \
-  __extension__                                                         \
-    ({                                                                  \
-       int64x2_t b_ = (b);                                              \
-       int64x2_t a_ = (a);                                              \
-       int64x2_t result;                                                \
-       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8"                     \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vextq_u8(a, b, c)                                               \
-  __extension__                                                         \
-    ({                                                                  \
-       uint8x16_t b_ = (b);                                             \
-       uint8x16_t a_ = (a);                                             \
-       uint8x16_t result;                                               \
-       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3"                       \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vextq_u16(a, b, c)                                              \
-  __extension__                                                         \
-    ({                                                                  \
-       uint16x8_t b_ = (b);                                             \
-       uint16x8_t a_ = (a);                                             \
-       uint16x8_t result;                                               \
-       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2"                     \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vextq_u32(a, b, c)                                              \
-  __extension__                                                         \
-    ({                                                                  \
-       uint32x4_t b_ = (b);                                             \
-       uint32x4_t a_ = (a);                                             \
-       uint32x4_t result;                                               \
-       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4"                     \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vextq_u64(a, b, c)                                              \
-  __extension__                                                         \
-    ({                                                                  \
-       uint64x2_t b_ = (b);                                             \
-       uint64x2_t a_ = (a);                                             \
-       uint64x2_t result;                                               \
-       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8"                     \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vfma_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
 {
@@ -5819,139 +5819,7 @@
   return result;
 }
 
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-vget_high_f32 (float32x4_t a)
-{
-  float32x2_t result;
-  __asm__ ("ins %0.d[0], %1.d[1]"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
-vget_high_f64 (float64x2_t a)
-{
-  float64x1_t result;
-  __asm__ ("ins %0.d[0], %1.d[1]"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-vget_high_p8 (poly8x16_t a)
-{
-  poly8x8_t result;
-  __asm__ ("ins %0.d[0], %1.d[1]"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
-vget_high_p16 (poly16x8_t a)
-{
-  poly16x4_t result;
-  __asm__ ("ins %0.d[0], %1.d[1]"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-vget_high_s8 (int8x16_t a)
-{
-  int8x8_t result;
-  __asm__ ("ins %0.d[0], %1.d[1]"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-vget_high_s16 (int16x8_t a)
-{
-  int16x4_t result;
-  __asm__ ("ins %0.d[0], %1.d[1]"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-vget_high_s32 (int32x4_t a)
-{
-  int32x2_t result;
-  __asm__ ("ins %0.d[0], %1.d[1]"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vget_high_s64 (int64x2_t a)
-{
-  int64x1_t result;
-  __asm__ ("ins %0.d[0], %1.d[1]"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-vget_high_u8 (uint8x16_t a)
-{
-  uint8x8_t result;
-  __asm__ ("ins %0.d[0], %1.d[1]"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-vget_high_u16 (uint16x8_t a)
-{
-  uint16x4_t result;
-  __asm__ ("ins %0.d[0], %1.d[1]"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-vget_high_u32 (uint32x4_t a)
-{
-  uint32x2_t result;
-  __asm__ ("ins %0.d[0], %1.d[1]"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vget_high_u64 (uint64x2_t a)
-{
-  uint64x1_t result;
-  __asm__ ("ins %0.d[0], %1.d[1]"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
 vhsub_s8 (int8x8_t a, int8x8_t b)
 {
   int8x8_t result;
@@ -6784,7 +6652,7 @@
 #define vmlal_high_lane_s16(a, b, c, d)                                 \
   __extension__                                                         \
     ({                                                                  \
-       int16x8_t c_ = (c);                                              \
+       int16x4_t c_ = (c);                                              \
        int16x8_t b_ = (b);                                              \
        int32x4_t a_ = (a);                                              \
        int32x4_t result;                                                \
@@ -6798,7 +6666,7 @@
 #define vmlal_high_lane_s32(a, b, c, d)                                 \
   __extension__                                                         \
     ({                                                                  \
-       int32x4_t c_ = (c);                                              \
+       int32x2_t c_ = (c);                                              \
        int32x4_t b_ = (b);                                              \
        int64x2_t a_ = (a);                                              \
        int64x2_t result;                                                \
@@ -6812,7 +6680,7 @@
 #define vmlal_high_lane_u16(a, b, c, d)                                 \
   __extension__                                                         \
     ({                                                                  \
-       uint16x8_t c_ = (c);                                             \
+       uint16x4_t c_ = (c);                                             \
        uint16x8_t b_ = (b);                                             \
        uint32x4_t a_ = (a);                                             \
        uint32x4_t result;                                               \
@@ -6826,7 +6694,7 @@
 #define vmlal_high_lane_u32(a, b, c, d)                                 \
   __extension__                                                         \
     ({                                                                  \
-       uint32x4_t c_ = (c);                                             \
+       uint32x2_t c_ = (c);                                             \
        uint32x4_t b_ = (b);                                             \
        uint64x2_t a_ = (a);                                             \
        uint64x2_t result;                                               \
@@ -7237,18 +7105,6 @@
   return result;
 }
 
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
-vmlaq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
-{
-  float64x2_t result;
-  float64x2_t t1;
-  __asm__ ("fmul %1.2d, %3.2d, %4.d[0]; fadd %0.2d, %0.2d, %1.2d"
-           : "=w"(result), "=w"(t1)
-           : "0"(a), "w"(b), "w"(c)
-           : /* No clobbers */);
-  return result;
-}
-
 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
 vmlaq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
 {
@@ -7484,7 +7340,7 @@
 #define vmlsl_high_lane_s16(a, b, c, d)                                 \
   __extension__                                                         \
     ({                                                                  \
-       int16x8_t c_ = (c);                                              \
+       int16x4_t c_ = (c);                                              \
        int16x8_t b_ = (b);                                              \
        int32x4_t a_ = (a);                                              \
        int32x4_t result;                                                \
@@ -7498,7 +7354,7 @@
 #define vmlsl_high_lane_s32(a, b, c, d)                                 \
   __extension__                                                         \
     ({                                                                  \
-       int32x4_t c_ = (c);                                              \
+       int32x2_t c_ = (c);                                              \
        int32x4_t b_ = (b);                                              \
        int64x2_t a_ = (a);                                              \
        int64x2_t result;                                                \
@@ -7512,7 +7368,7 @@
 #define vmlsl_high_lane_u16(a, b, c, d)                                 \
   __extension__                                                         \
     ({                                                                  \
-       uint16x8_t c_ = (c);                                             \
+       uint16x4_t c_ = (c);                                             \
        uint16x8_t b_ = (b);                                             \
        uint32x4_t a_ = (a);                                             \
        uint32x4_t result;                                               \
@@ -7526,7 +7382,7 @@
 #define vmlsl_high_lane_u32(a, b, c, d)                                 \
   __extension__                                                         \
     ({                                                                  \
-       uint32x4_t c_ = (c);                                             \
+       uint32x2_t c_ = (c);                                             \
        uint32x4_t b_ = (b);                                             \
        uint64x2_t a_ = (a);                                             \
        uint64x2_t result;                                               \
@@ -7937,18 +7793,6 @@
   return result;
 }
 
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
-vmlsq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
-{
-  float64x2_t result;
-  float64x2_t t1;
-  __asm__ ("fmul %1.2d, %3.2d, %4.d[0]; fsub %0.2d, %0.2d, %1.2d"
-           : "=w"(result), "=w"(t1)
-           : "0"(a), "w"(b), "x"(c)
-           : /* No clobbers */);
-  return result;
-}
-
 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
 vmlsq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
 {
@@ -9312,57 +9156,7 @@
   return result;
 }
 
-__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-vpadd_s8 (int8x8_t __a, int8x8_t __b)
-{
-  return __builtin_aarch64_addpv8qi (__a, __b);
-}
-
 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-vpadd_s16 (int16x4_t __a, int16x4_t __b)
-{
-  return __builtin_aarch64_addpv4hi (__a, __b);
-}
-
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-vpadd_s32 (int32x2_t __a, int32x2_t __b)
-{
-  return __builtin_aarch64_addpv2si (__a, __b);
-}
-
-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-vpadd_u8 (uint8x8_t __a, uint8x8_t __b)
-{
-  return (uint8x8_t) __builtin_aarch64_addpv8qi ((int8x8_t) __a,
-						 (int8x8_t) __b);
-}
-
-__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-vpadd_u16 (uint16x4_t __a, uint16x4_t __b)
-{
-  return (uint16x4_t) __builtin_aarch64_addpv4hi ((int16x4_t) __a,
-						  (int16x4_t) __b);
-}
-
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-vpadd_u32 (uint32x2_t __a, uint32x2_t __b)
-{
-  return (uint32x2_t) __builtin_aarch64_addpv2si ((int32x2_t) __a,
-						  (int32x2_t) __b);
-}
-
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
-vpaddd_f64 (float64x2_t a)
-{
-  float64_t result;
-  __asm__ ("faddp %d0,%1.2d"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
 vpaddl_s8 (int8x8_t a)
 {
   int16x4_t result;
@@ -10556,50 +10350,6 @@
        result;                                                          \
      })
 
-__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-vrbit_s8 (int8x8_t a)
-{
-  int8x8_t result;
-  __asm__ ("rbit %0.8b,%1.8b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-vrbit_u8 (uint8x8_t a)
-{
-  uint8x8_t result;
-  __asm__ ("rbit %0.8b,%1.8b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-vrbitq_s8 (int8x16_t a)
-{
-  int8x16_t result;
-  __asm__ ("rbit %0.16b,%1.16b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-vrbitq_u8 (uint8x16_t a)
-{
-  uint8x16_t result;
-  __asm__ ("rbit %0.16b,%1.16b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vrecpe_u32 (uint32x2_t a)
 {
@@ -10622,402 +10372,6 @@
   return result;
 }
 
-__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-vrev16_p8 (poly8x8_t a)
-{
-  poly8x8_t result;
-  __asm__ ("rev16 %0.8b,%1.8b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-vrev16_s8 (int8x8_t a)
-{
-  int8x8_t result;
-  __asm__ ("rev16 %0.8b,%1.8b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-vrev16_u8 (uint8x8_t a)
-{
-  uint8x8_t result;
-  __asm__ ("rev16 %0.8b,%1.8b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
-vrev16q_p8 (poly8x16_t a)
-{
-  poly8x16_t result;
-  __asm__ ("rev16 %0.16b,%1.16b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-vrev16q_s8 (int8x16_t a)
-{
-  int8x16_t result;
-  __asm__ ("rev16 %0.16b,%1.16b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-vrev16q_u8 (uint8x16_t a)
-{
-  uint8x16_t result;
-  __asm__ ("rev16 %0.16b,%1.16b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-vrev32_p8 (poly8x8_t a)
-{
-  poly8x8_t result;
-  __asm__ ("rev32 %0.8b,%1.8b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
-vrev32_p16 (poly16x4_t a)
-{
-  poly16x4_t result;
-  __asm__ ("rev32 %0.4h,%1.4h"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-vrev32_s8 (int8x8_t a)
-{
-  int8x8_t result;
-  __asm__ ("rev32 %0.8b,%1.8b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-vrev32_s16 (int16x4_t a)
-{
-  int16x4_t result;
-  __asm__ ("rev32 %0.4h,%1.4h"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-vrev32_u8 (uint8x8_t a)
-{
-  uint8x8_t result;
-  __asm__ ("rev32 %0.8b,%1.8b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-vrev32_u16 (uint16x4_t a)
-{
-  uint16x4_t result;
-  __asm__ ("rev32 %0.4h,%1.4h"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
-vrev32q_p8 (poly8x16_t a)
-{
-  poly8x16_t result;
-  __asm__ ("rev32 %0.16b,%1.16b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
-vrev32q_p16 (poly16x8_t a)
-{
-  poly16x8_t result;
-  __asm__ ("rev32 %0.8h,%1.8h"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-vrev32q_s8 (int8x16_t a)
-{
-  int8x16_t result;
-  __asm__ ("rev32 %0.16b,%1.16b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-vrev32q_s16 (int16x8_t a)
-{
-  int16x8_t result;
-  __asm__ ("rev32 %0.8h,%1.8h"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-vrev32q_u8 (uint8x16_t a)
-{
-  uint8x16_t result;
-  __asm__ ("rev32 %0.16b,%1.16b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
-vrev32q_u16 (uint16x8_t a)
-{
-  uint16x8_t result;
-  __asm__ ("rev32 %0.8h,%1.8h"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-vrev64_f32 (float32x2_t a)
-{
-  float32x2_t result;
-  __asm__ ("rev64 %0.2s,%1.2s"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-vrev64_p8 (poly8x8_t a)
-{
-  poly8x8_t result;
-  __asm__ ("rev64 %0.8b,%1.8b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
-vrev64_p16 (poly16x4_t a)
-{
-  poly16x4_t result;
-  __asm__ ("rev64 %0.4h,%1.4h"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-vrev64_s8 (int8x8_t a)
-{
-  int8x8_t result;
-  __asm__ ("rev64 %0.8b,%1.8b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-vrev64_s16 (int16x4_t a)
-{
-  int16x4_t result;
-  __asm__ ("rev64 %0.4h,%1.4h"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-vrev64_s32 (int32x2_t a)
-{
-  int32x2_t result;
-  __asm__ ("rev64 %0.2s,%1.2s"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-vrev64_u8 (uint8x8_t a)
-{
-  uint8x8_t result;
-  __asm__ ("rev64 %0.8b,%1.8b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-vrev64_u16 (uint16x4_t a)
-{
-  uint16x4_t result;
-  __asm__ ("rev64 %0.4h,%1.4h"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-vrev64_u32 (uint32x2_t a)
-{
-  uint32x2_t result;
-  __asm__ ("rev64 %0.2s,%1.2s"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
-vrev64q_f32 (float32x4_t a)
-{
-  float32x4_t result;
-  __asm__ ("rev64 %0.4s,%1.4s"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
-vrev64q_p8 (poly8x16_t a)
-{
-  poly8x16_t result;
-  __asm__ ("rev64 %0.16b,%1.16b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
-vrev64q_p16 (poly16x8_t a)
-{
-  poly16x8_t result;
-  __asm__ ("rev64 %0.8h,%1.8h"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-vrev64q_s8 (int8x16_t a)
-{
-  int8x16_t result;
-  __asm__ ("rev64 %0.16b,%1.16b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-vrev64q_s16 (int16x8_t a)
-{
-  int16x8_t result;
-  __asm__ ("rev64 %0.8h,%1.8h"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-vrev64q_s32 (int32x4_t a)
-{
-  int32x4_t result;
-  __asm__ ("rev64 %0.4s,%1.4s"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-vrev64q_u8 (uint8x16_t a)
-{
-  uint8x16_t result;
-  __asm__ ("rev64 %0.16b,%1.16b"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
-vrev64q_u16 (uint16x8_t a)
-{
-  uint16x8_t result;
-  __asm__ ("rev64 %0.8h,%1.8h"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-vrev64q_u32 (uint32x4_t a)
-{
-  uint32x4_t result;
-  __asm__ ("rev64 %0.4s,%1.4s"
-           : "=w"(result)
-           : "w"(a)
-           : /* No clobbers */);
-  return result;
-}
-
 #define vrshrn_high_n_s16(a, b, c)                                      \
   __extension__                                                         \
     ({                                                                  \
@@ -11323,17 +10677,6 @@
   return result;
 }
 
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
-vrsrtsq_f64 (float64x2_t a, float64x2_t b)
-{
-  float64x2_t result;
-  __asm__ ("frsqrts %0.2d,%1.2d,%2.2d"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
 vrsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c)
 {
@@ -12441,469 +11784,7 @@
   return result;
 }
 
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-vtrn1_f32 (float32x2_t a, float32x2_t b)
-{
-  float32x2_t result;
-  __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-vtrn1_p8 (poly8x8_t a, poly8x8_t b)
-{
-  poly8x8_t result;
-  __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
-vtrn1_p16 (poly16x4_t a, poly16x4_t b)
-{
-  poly16x4_t result;
-  __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-vtrn1_s8 (int8x8_t a, int8x8_t b)
-{
-  int8x8_t result;
-  __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-vtrn1_s16 (int16x4_t a, int16x4_t b)
-{
-  int16x4_t result;
-  __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-vtrn1_s32 (int32x2_t a, int32x2_t b)
-{
-  int32x2_t result;
-  __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-vtrn1_u8 (uint8x8_t a, uint8x8_t b)
-{
-  uint8x8_t result;
-  __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-vtrn1_u16 (uint16x4_t a, uint16x4_t b)
-{
-  uint16x4_t result;
-  __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-vtrn1_u32 (uint32x2_t a, uint32x2_t b)
-{
-  uint32x2_t result;
-  __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
-vtrn1q_f32 (float32x4_t a, float32x4_t b)
-{
-  float32x4_t result;
-  __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
-vtrn1q_f64 (float64x2_t a, float64x2_t b)
-{
-  float64x2_t result;
-  __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
-vtrn1q_p8 (poly8x16_t a, poly8x16_t b)
-{
-  poly8x16_t result;
-  __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
-vtrn1q_p16 (poly16x8_t a, poly16x8_t b)
-{
-  poly16x8_t result;
-  __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-vtrn1q_s8 (int8x16_t a, int8x16_t b)
-{
-  int8x16_t result;
-  __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-vtrn1q_s16 (int16x8_t a, int16x8_t b)
-{
-  int16x8_t result;
-  __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-vtrn1q_s32 (int32x4_t a, int32x4_t b)
-{
-  int32x4_t result;
-  __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-vtrn1q_s64 (int64x2_t a, int64x2_t b)
-{
-  int64x2_t result;
-  __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-vtrn1q_u8 (uint8x16_t a, uint8x16_t b)
-{
-  uint8x16_t result;
-  __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
-vtrn1q_u16 (uint16x8_t a, uint16x8_t b)
-{
-  uint16x8_t result;
-  __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-vtrn1q_u32 (uint32x4_t a, uint32x4_t b)
-{
-  uint32x4_t result;
-  __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-vtrn1q_u64 (uint64x2_t a, uint64x2_t b)
-{
-  uint64x2_t result;
-  __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-vtrn2_f32 (float32x2_t a, float32x2_t b)
-{
-  float32x2_t result;
-  __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-vtrn2_p8 (poly8x8_t a, poly8x8_t b)
-{
-  poly8x8_t result;
-  __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
-vtrn2_p16 (poly16x4_t a, poly16x4_t b)
-{
-  poly16x4_t result;
-  __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-vtrn2_s8 (int8x8_t a, int8x8_t b)
-{
-  int8x8_t result;
-  __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-vtrn2_s16 (int16x4_t a, int16x4_t b)
-{
-  int16x4_t result;
-  __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-vtrn2_s32 (int32x2_t a, int32x2_t b)
-{
-  int32x2_t result;
-  __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-vtrn2_u8 (uint8x8_t a, uint8x8_t b)
-{
-  uint8x8_t result;
-  __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-vtrn2_u16 (uint16x4_t a, uint16x4_t b)
-{
-  uint16x4_t result;
-  __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-vtrn2_u32 (uint32x2_t a, uint32x2_t b)
-{
-  uint32x2_t result;
-  __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
-vtrn2q_f32 (float32x4_t a, float32x4_t b)
-{
-  float32x4_t result;
-  __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
-vtrn2q_f64 (float64x2_t a, float64x2_t b)
-{
-  float64x2_t result;
-  __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
-vtrn2q_p8 (poly8x16_t a, poly8x16_t b)
-{
-  poly8x16_t result;
-  __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
-vtrn2q_p16 (poly16x8_t a, poly16x8_t b)
-{
-  poly16x8_t result;
-  __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-vtrn2q_s8 (int8x16_t a, int8x16_t b)
-{
-  int8x16_t result;
-  __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-vtrn2q_s16 (int16x8_t a, int16x8_t b)
-{
-  int16x8_t result;
-  __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-vtrn2q_s32 (int32x4_t a, int32x4_t b)
-{
-  int32x4_t result;
-  __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-vtrn2q_s64 (int64x2_t a, int64x2_t b)
-{
-  int64x2_t result;
-  __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-vtrn2q_u8 (uint8x16_t a, uint8x16_t b)
-{
-  uint8x16_t result;
-  __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
-vtrn2q_u16 (uint16x8_t a, uint16x8_t b)
-{
-  uint16x8_t result;
-  __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-vtrn2q_u32 (uint32x4_t a, uint32x4_t b)
-{
-  uint32x4_t result;
-  __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-vtrn2q_u64 (uint64x2_t a, uint64x2_t b)
-{
-  uint64x2_t result;
-  __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vtst_p8 (poly8x8_t a, poly8x8_t b)
 {
   uint8x8_t result;
@@ -12946,930 +11827,7 @@
            : /* No clobbers */);
   return result;
 }
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-vuzp1_f32 (float32x2_t a, float32x2_t b)
-{
-  float32x2_t result;
-  __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
 
-__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-vuzp1_p8 (poly8x8_t a, poly8x8_t b)
-{
-  poly8x8_t result;
-  __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
-vuzp1_p16 (poly16x4_t a, poly16x4_t b)
-{
-  poly16x4_t result;
-  __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-vuzp1_s8 (int8x8_t a, int8x8_t b)
-{
-  int8x8_t result;
-  __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-vuzp1_s16 (int16x4_t a, int16x4_t b)
-{
-  int16x4_t result;
-  __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-vuzp1_s32 (int32x2_t a, int32x2_t b)
-{
-  int32x2_t result;
-  __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-vuzp1_u8 (uint8x8_t a, uint8x8_t b)
-{
-  uint8x8_t result;
-  __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-vuzp1_u16 (uint16x4_t a, uint16x4_t b)
-{
-  uint16x4_t result;
-  __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-vuzp1_u32 (uint32x2_t a, uint32x2_t b)
-{
-  uint32x2_t result;
-  __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
-vuzp1q_f32 (float32x4_t a, float32x4_t b)
-{
-  float32x4_t result;
-  __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
-vuzp1q_f64 (float64x2_t a, float64x2_t b)
-{
-  float64x2_t result;
-  __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
-vuzp1q_p8 (poly8x16_t a, poly8x16_t b)
-{
-  poly8x16_t result;
-  __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
-vuzp1q_p16 (poly16x8_t a, poly16x8_t b)
-{
-  poly16x8_t result;
-  __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-vuzp1q_s8 (int8x16_t a, int8x16_t b)
-{
-  int8x16_t result;
-  __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-vuzp1q_s16 (int16x8_t a, int16x8_t b)
-{
-  int16x8_t result;
-  __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-vuzp1q_s32 (int32x4_t a, int32x4_t b)
-{
-  int32x4_t result;
-  __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-vuzp1q_s64 (int64x2_t a, int64x2_t b)
-{
-  int64x2_t result;
-  __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-vuzp1q_u8 (uint8x16_t a, uint8x16_t b)
-{
-  uint8x16_t result;
-  __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
-vuzp1q_u16 (uint16x8_t a, uint16x8_t b)
-{
-  uint16x8_t result;
-  __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-vuzp1q_u32 (uint32x4_t a, uint32x4_t b)
-{
-  uint32x4_t result;
-  __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-vuzp1q_u64 (uint64x2_t a, uint64x2_t b)
-{
-  uint64x2_t result;
-  __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-vuzp2_f32 (float32x2_t a, float32x2_t b)
-{
-  float32x2_t result;
-  __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-vuzp2_p8 (poly8x8_t a, poly8x8_t b)
-{
-  poly8x8_t result;
-  __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
-vuzp2_p16 (poly16x4_t a, poly16x4_t b)
-{
-  poly16x4_t result;
-  __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-vuzp2_s8 (int8x8_t a, int8x8_t b)
-{
-  int8x8_t result;
-  __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-vuzp2_s16 (int16x4_t a, int16x4_t b)
-{
-  int16x4_t result;
-  __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-vuzp2_s32 (int32x2_t a, int32x2_t b)
-{
-  int32x2_t result;
-  __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-vuzp2_u8 (uint8x8_t a, uint8x8_t b)
-{
-  uint8x8_t result;
-  __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-vuzp2_u16 (uint16x4_t a, uint16x4_t b)
-{
-  uint16x4_t result;
-  __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-vuzp2_u32 (uint32x2_t a, uint32x2_t b)
-{
-  uint32x2_t result;
-  __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
-vuzp2q_f32 (float32x4_t a, float32x4_t b)
-{
-  float32x4_t result;
-  __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
-vuzp2q_f64 (float64x2_t a, float64x2_t b)
-{
-  float64x2_t result;
-  __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
-vuzp2q_p8 (poly8x16_t a, poly8x16_t b)
-{
-  poly8x16_t result;
-  __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
-vuzp2q_p16 (poly16x8_t a, poly16x8_t b)
-{
-  poly16x8_t result;
-  __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-vuzp2q_s8 (int8x16_t a, int8x16_t b)
-{
-  int8x16_t result;
-  __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-vuzp2q_s16 (int16x8_t a, int16x8_t b)
-{
-  int16x8_t result;
-  __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-vuzp2q_s32 (int32x4_t a, int32x4_t b)
-{
-  int32x4_t result;
-  __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-vuzp2q_s64 (int64x2_t a, int64x2_t b)
-{
-  int64x2_t result;
-  __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-vuzp2q_u8 (uint8x16_t a, uint8x16_t b)
-{
-  uint8x16_t result;
-  __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
-vuzp2q_u16 (uint16x8_t a, uint16x8_t b)
-{
-  uint16x8_t result;
-  __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-vuzp2q_u32 (uint32x4_t a, uint32x4_t b)
-{
-  uint32x4_t result;
-  __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-vuzp2q_u64 (uint64x2_t a, uint64x2_t b)
-{
-  uint64x2_t result;
-  __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-vzip1_f32 (float32x2_t a, float32x2_t b)
-{
-  float32x2_t result;
-  __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-vzip1_p8 (poly8x8_t a, poly8x8_t b)
-{
-  poly8x8_t result;
-  __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
-vzip1_p16 (poly16x4_t a, poly16x4_t b)
-{
-  poly16x4_t result;
-  __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-vzip1_s8 (int8x8_t a, int8x8_t b)
-{
-  int8x8_t result;
-  __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-vzip1_s16 (int16x4_t a, int16x4_t b)
-{
-  int16x4_t result;
-  __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-vzip1_s32 (int32x2_t a, int32x2_t b)
-{
-  int32x2_t result;
-  __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-vzip1_u8 (uint8x8_t a, uint8x8_t b)
-{
-  uint8x8_t result;
-  __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-vzip1_u16 (uint16x4_t a, uint16x4_t b)
-{
-  uint16x4_t result;
-  __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-vzip1_u32 (uint32x2_t a, uint32x2_t b)
-{
-  uint32x2_t result;
-  __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
-vzip1q_f32 (float32x4_t a, float32x4_t b)
-{
-  float32x4_t result;
-  __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
-vzip1q_f64 (float64x2_t a, float64x2_t b)
-{
-  float64x2_t result;
-  __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
-vzip1q_p8 (poly8x16_t a, poly8x16_t b)
-{
-  poly8x16_t result;
-  __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
-vzip1q_p16 (poly16x8_t a, poly16x8_t b)
-{
-  poly16x8_t result;
-  __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-vzip1q_s8 (int8x16_t a, int8x16_t b)
-{
-  int8x16_t result;
-  __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-vzip1q_s16 (int16x8_t a, int16x8_t b)
-{
-  int16x8_t result;
-  __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-vzip1q_s32 (int32x4_t a, int32x4_t b)
-{
-  int32x4_t result;
-  __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-vzip1q_s64 (int64x2_t a, int64x2_t b)
-{
-  int64x2_t result;
-  __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-vzip1q_u8 (uint8x16_t a, uint8x16_t b)
-{
-  uint8x16_t result;
-  __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
-vzip1q_u16 (uint16x8_t a, uint16x8_t b)
-{
-  uint16x8_t result;
-  __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-vzip1q_u32 (uint32x4_t a, uint32x4_t b)
-{
-  uint32x4_t result;
-  __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-vzip1q_u64 (uint64x2_t a, uint64x2_t b)
-{
-  uint64x2_t result;
-  __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-vzip2_f32 (float32x2_t a, float32x2_t b)
-{
-  float32x2_t result;
-  __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-vzip2_p8 (poly8x8_t a, poly8x8_t b)
-{
-  poly8x8_t result;
-  __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
-vzip2_p16 (poly16x4_t a, poly16x4_t b)
-{
-  poly16x4_t result;
-  __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-vzip2_s8 (int8x8_t a, int8x8_t b)
-{
-  int8x8_t result;
-  __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-vzip2_s16 (int16x4_t a, int16x4_t b)
-{
-  int16x4_t result;
-  __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-vzip2_s32 (int32x2_t a, int32x2_t b)
-{
-  int32x2_t result;
-  __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-vzip2_u8 (uint8x8_t a, uint8x8_t b)
-{
-  uint8x8_t result;
-  __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-vzip2_u16 (uint16x4_t a, uint16x4_t b)
-{
-  uint16x4_t result;
-  __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-vzip2_u32 (uint32x2_t a, uint32x2_t b)
-{
-  uint32x2_t result;
-  __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
-vzip2q_f32 (float32x4_t a, float32x4_t b)
-{
-  float32x4_t result;
-  __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
-vzip2q_f64 (float64x2_t a, float64x2_t b)
-{
-  float64x2_t result;
-  __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
-vzip2q_p8 (poly8x16_t a, poly8x16_t b)
-{
-  poly8x16_t result;
-  __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
-vzip2q_p16 (poly16x8_t a, poly16x8_t b)
-{
-  poly16x8_t result;
-  __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-vzip2q_s8 (int8x16_t a, int8x16_t b)
-{
-  int8x16_t result;
-  __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-vzip2q_s16 (int16x8_t a, int16x8_t b)
-{
-  int16x8_t result;
-  __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-vzip2q_s32 (int32x4_t a, int32x4_t b)
-{
-  int32x4_t result;
-  __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-vzip2q_s64 (int64x2_t a, int64x2_t b)
-{
-  int64x2_t result;
-  __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-vzip2q_u8 (uint8x16_t a, uint8x16_t b)
-{
-  uint8x16_t result;
-  __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
-vzip2q_u16 (uint16x8_t a, uint16x8_t b)
-{
-  uint16x8_t result;
-  __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-vzip2q_u32 (uint32x4_t a, uint32x4_t b)
-{
-  uint32x4_t result;
-  __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-vzip2q_u64 (uint64x2_t a, uint64x2_t b)
-{
-  uint64x2_t result;
-  __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
-}
-
 /* End of temporary inline asm implementations.  */
 
 /* Start of temporary inline asm for vldn, vstn and friends.  */
@@ -13953,378 +11911,226 @@
 __STRUCTN (float, 64, 4)
 #undef __STRUCTN
 
-#define __LD2R_FUNC(rettype, structtype, ptrtype,			\
-		    regsuffix, funcsuffix, Q)				\
-  __extension__ static __inline rettype					\
-  __attribute__ ((__always_inline__)) 					\
-  vld2 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr)			\
-  {									\
-    rettype result;							\
-    __asm__ ("ld2r {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t"	\
-	     "st1 {v16." #regsuffix ", v17." #regsuffix "}, %0\n\t"	\
-	     : "=Q"(result)						\
-	     : "Q"(*(const structtype *)ptr)				\
-	     : "memory", "v16", "v17");					\
-    return result;							\
-  }
 
-__LD2R_FUNC (float32x2x2_t, float32x2_t, float32_t, 2s, f32,)
-__LD2R_FUNC (float64x1x2_t, float64x2_t, float64_t, 1d, f64,)
-__LD2R_FUNC (poly8x8x2_t, poly8x2_t, poly8_t, 8b, p8,)
-__LD2R_FUNC (poly16x4x2_t, poly16x2_t, poly16_t, 4h, p16,)
-__LD2R_FUNC (int8x8x2_t, int8x2_t, int8_t, 8b, s8,)
-__LD2R_FUNC (int16x4x2_t, int16x2_t, int16_t, 4h, s16,)
-__LD2R_FUNC (int32x2x2_t, int32x2_t, int32_t, 2s, s32,)
-__LD2R_FUNC (int64x1x2_t, int64x2_t, int64_t, 1d, s64,)
-__LD2R_FUNC (uint8x8x2_t, uint8x2_t, uint8_t, 8b, u8,)
-__LD2R_FUNC (uint16x4x2_t, uint16x2_t, uint16_t, 4h, u16,)
-__LD2R_FUNC (uint32x2x2_t, uint32x2_t, uint32_t, 2s, u32,)
-__LD2R_FUNC (uint64x1x2_t, uint64x2_t, uint64_t, 1d, u64,)
-__LD2R_FUNC (float32x4x2_t, float32x2_t, float32_t, 4s, f32, q)
-__LD2R_FUNC (float64x2x2_t, float64x2_t, float64_t, 2d, f64, q)
-__LD2R_FUNC (poly8x16x2_t, poly8x2_t, poly8_t, 16b, p8, q)
-__LD2R_FUNC (poly16x8x2_t, poly16x2_t, poly16_t, 8h, p16, q)
-__LD2R_FUNC (int8x16x2_t, int8x2_t, int8_t, 16b, s8, q)
-__LD2R_FUNC (int16x8x2_t, int16x2_t, int16_t, 8h, s16, q)
-__LD2R_FUNC (int32x4x2_t, int32x2_t, int32_t, 4s, s32, q)
-__LD2R_FUNC (int64x2x2_t, int64x2_t, int64_t, 2d, s64, q)
-__LD2R_FUNC (uint8x16x2_t, uint8x2_t, uint8_t, 16b, u8, q)
-__LD2R_FUNC (uint16x8x2_t, uint16x2_t, uint16_t, 8h, u16, q)
-__LD2R_FUNC (uint32x4x2_t, uint32x2_t, uint32_t, 4s, u32, q)
-__LD2R_FUNC (uint64x2x2_t, uint64x2_t, uint64_t, 2d, u64, q)
+#define __ST2_LANE_FUNC(intype, largetype, ptrtype,			     \
+			mode, ptr_mode, funcsuffix, signedtype)		     \
+__extension__ static __inline void					     \
+__attribute__ ((__always_inline__))					     \
+vst2_lane_ ## funcsuffix (ptrtype *__ptr,				     \
+			  intype __b, const int __c)			     \
+{									     \
+  __builtin_aarch64_simd_oi __o;					     \
+  largetype __temp;							     \
+  __temp.val[0]								     \
+    = vcombine_##funcsuffix (__b.val[0],				     \
+			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
+  __temp.val[1]								     \
+    = vcombine_##funcsuffix (__b.val[1],				     \
+			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
+  __o = __builtin_aarch64_set_qregoi##mode (__o,			     \
+					    (signedtype) __temp.val[0], 0);  \
+  __o = __builtin_aarch64_set_qregoi##mode (__o,			     \
+					    (signedtype) __temp.val[1], 1);  \
+  __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *)  \
+				     __ptr, __o, __c);			     \
+}
 
-#define __LD2_LANE_FUNC(rettype, ptrtype, regsuffix,			\
-			lnsuffix, funcsuffix, Q)			\
-  __extension__ static __inline rettype					\
-  __attribute__ ((__always_inline__))					\
-  vld2 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr,		\
-				     rettype b, const int c)		\
-  {									\
-    rettype result;							\
-    __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t"	\
-	     "ld2 {v16." #lnsuffix ", v17." #lnsuffix "}[%3], %2\n\t"	\
-	     "st1 {v16." #regsuffix ", v17." #regsuffix "}, %0\n\t"	\
-	     : "=Q"(result)						\
-	     : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c)		\
-	     : "memory", "v16", "v17");					\
-    return result;							\
-  }
+__ST2_LANE_FUNC (float32x2x2_t, float32x4x2_t, float32_t, v4sf, sf, f32,
+		 float32x4_t)
+__ST2_LANE_FUNC (float64x1x2_t, float64x2x2_t, float64_t, v2df, df, f64,
+		 float64x2_t)
+__ST2_LANE_FUNC (poly8x8x2_t, poly8x16x2_t, poly8_t, v16qi, qi, p8, int8x16_t)
+__ST2_LANE_FUNC (poly16x4x2_t, poly16x8x2_t, poly16_t, v8hi, hi, p16,
+		 int16x8_t)
+__ST2_LANE_FUNC (int8x8x2_t, int8x16x2_t, int8_t, v16qi, qi, s8, int8x16_t)
+__ST2_LANE_FUNC (int16x4x2_t, int16x8x2_t, int16_t, v8hi, hi, s16, int16x8_t)
+__ST2_LANE_FUNC (int32x2x2_t, int32x4x2_t, int32_t, v4si, si, s32, int32x4_t)
+__ST2_LANE_FUNC (int64x1x2_t, int64x2x2_t, int64_t, v2di, di, s64, int64x2_t)
+__ST2_LANE_FUNC (uint8x8x2_t, uint8x16x2_t, uint8_t, v16qi, qi, u8, int8x16_t)
+__ST2_LANE_FUNC (uint16x4x2_t, uint16x8x2_t, uint16_t, v8hi, hi, u16,
+		 int16x8_t)
+__ST2_LANE_FUNC (uint32x2x2_t, uint32x4x2_t, uint32_t, v4si, si, u32,
+		 int32x4_t)
+__ST2_LANE_FUNC (uint64x1x2_t, uint64x2x2_t, uint64_t, v2di, di, u64,
+		 int64x2_t)
 
-__LD2_LANE_FUNC (int8x8x2_t, uint8_t, 8b, b, s8,)
-__LD2_LANE_FUNC (float32x2x2_t, float32_t, 2s, s, f32,)
-__LD2_LANE_FUNC (float64x1x2_t, float64_t, 1d, d, f64,)
-__LD2_LANE_FUNC (poly8x8x2_t, poly8_t, 8b, b, p8,)
-__LD2_LANE_FUNC (poly16x4x2_t, poly16_t, 4h, h, p16,)
-__LD2_LANE_FUNC (int16x4x2_t, int16_t, 4h, h, s16,)
-__LD2_LANE_FUNC (int32x2x2_t, int32_t, 2s, s, s32,)
-__LD2_LANE_FUNC (int64x1x2_t, int64_t, 1d, d, s64,)
-__LD2_LANE_FUNC (uint8x8x2_t, uint8_t, 8b, b, u8,)
-__LD2_LANE_FUNC (uint16x4x2_t, uint16_t, 4h, h, u16,)
-__LD2_LANE_FUNC (uint32x2x2_t, uint32_t, 2s, s, u32,)
-__LD2_LANE_FUNC (uint64x1x2_t, uint64_t, 1d, d, u64,)
-__LD2_LANE_FUNC (float32x4x2_t, float32_t, 4s, s, f32, q)
-__LD2_LANE_FUNC (float64x2x2_t, float64_t, 2d, d, f64, q)
-__LD2_LANE_FUNC (poly8x16x2_t, poly8_t, 16b, b, p8, q)
-__LD2_LANE_FUNC (poly16x8x2_t, poly16_t, 8h, h, p16, q)
-__LD2_LANE_FUNC (int8x16x2_t, int8_t, 16b, b, s8, q)
-__LD2_LANE_FUNC (int16x8x2_t, int16_t, 8h, h, s16, q)
-__LD2_LANE_FUNC (int32x4x2_t, int32_t, 4s, s, s32, q)
-__LD2_LANE_FUNC (int64x2x2_t, int64_t, 2d, d, s64, q)
-__LD2_LANE_FUNC (uint8x16x2_t, uint8_t, 16b, b, u8, q)
-__LD2_LANE_FUNC (uint16x8x2_t, uint16_t, 8h, h, u16, q)
-__LD2_LANE_FUNC (uint32x4x2_t, uint32_t, 4s, s, u32, q)
-__LD2_LANE_FUNC (uint64x2x2_t, uint64_t, 2d, d, u64, q)
+#undef __ST2_LANE_FUNC
+#define __ST2_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix)	    \
+__extension__ static __inline void					    \
+__attribute__ ((__always_inline__))					    \
+vst2q_lane_ ## funcsuffix (ptrtype *__ptr,				    \
+			   intype __b, const int __c)			    \
+{									    \
+  union { intype __i;							    \
+	  __builtin_aarch64_simd_oi __o; } __temp = { __b };		    \
+  __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
+				    __ptr, __temp.__o, __c);		    \
+}
 
-#define __LD3R_FUNC(rettype, structtype, ptrtype,			\
-		    regsuffix, funcsuffix, Q)				\
-  __extension__ static __inline rettype					\
-  __attribute__ ((__always_inline__))					\
-  vld3 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr)			\
-  {									\
-    rettype result;							\
-    __asm__ ("ld3r {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t"	\
-	     "st1 {v16." #regsuffix " - v18." #regsuffix "}, %0\n\t"	\
-	     : "=Q"(result)						\
-	     : "Q"(*(const structtype *)ptr)				\
-	     : "memory", "v16", "v17", "v18");				\
-    return result;							\
-  }
+__ST2_LANE_FUNC (float32x4x2_t, float32_t, v4sf, sf, f32)
+__ST2_LANE_FUNC (float64x2x2_t, float64_t, v2df, df, f64)
+__ST2_LANE_FUNC (poly8x16x2_t, poly8_t, v16qi, qi, p8)
+__ST2_LANE_FUNC (poly16x8x2_t, poly16_t, v8hi, hi, p16)
+__ST2_LANE_FUNC (int8x16x2_t, int8_t, v16qi, qi, s8)
+__ST2_LANE_FUNC (int16x8x2_t, int16_t, v8hi, hi, s16)
+__ST2_LANE_FUNC (int32x4x2_t, int32_t, v4si, si, s32)
+__ST2_LANE_FUNC (int64x2x2_t, int64_t, v2di, di, s64)
+__ST2_LANE_FUNC (uint8x16x2_t, uint8_t, v16qi, qi, u8)
+__ST2_LANE_FUNC (uint16x8x2_t, uint16_t, v8hi, hi, u16)
+__ST2_LANE_FUNC (uint32x4x2_t, uint32_t, v4si, si, u32)
+__ST2_LANE_FUNC (uint64x2x2_t, uint64_t, v2di, di, u64)
 
-__LD3R_FUNC (float32x2x3_t, float32x3_t, float32_t, 2s, f32,)
-__LD3R_FUNC (float64x1x3_t, float64x3_t, float64_t, 1d, f64,)
-__LD3R_FUNC (poly8x8x3_t, poly8x3_t, poly8_t, 8b, p8,)
-__LD3R_FUNC (poly16x4x3_t, poly16x3_t, poly16_t, 4h, p16,)
-__LD3R_FUNC (int8x8x3_t, int8x3_t, int8_t, 8b, s8,)
-__LD3R_FUNC (int16x4x3_t, int16x3_t, int16_t, 4h, s16,)
-__LD3R_FUNC (int32x2x3_t, int32x3_t, int32_t, 2s, s32,)
-__LD3R_FUNC (int64x1x3_t, int64x3_t, int64_t, 1d, s64,)
-__LD3R_FUNC (uint8x8x3_t, uint8x3_t, uint8_t, 8b, u8,)
-__LD3R_FUNC (uint16x4x3_t, uint16x3_t, uint16_t, 4h, u16,)
-__LD3R_FUNC (uint32x2x3_t, uint32x3_t, uint32_t, 2s, u32,)
-__LD3R_FUNC (uint64x1x3_t, uint64x3_t, uint64_t, 1d, u64,)
-__LD3R_FUNC (float32x4x3_t, float32x3_t, float32_t, 4s, f32, q)
-__LD3R_FUNC (float64x2x3_t, float64x3_t, float64_t, 2d, f64, q)
-__LD3R_FUNC (poly8x16x3_t, poly8x3_t, poly8_t, 16b, p8, q)
-__LD3R_FUNC (poly16x8x3_t, poly16x3_t, poly16_t, 8h, p16, q)
-__LD3R_FUNC (int8x16x3_t, int8x3_t, int8_t, 16b, s8, q)
-__LD3R_FUNC (int16x8x3_t, int16x3_t, int16_t, 8h, s16, q)
-__LD3R_FUNC (int32x4x3_t, int32x3_t, int32_t, 4s, s32, q)
-__LD3R_FUNC (int64x2x3_t, int64x3_t, int64_t, 2d, s64, q)
-__LD3R_FUNC (uint8x16x3_t, uint8x3_t, uint8_t, 16b, u8, q)
-__LD3R_FUNC (uint16x8x3_t, uint16x3_t, uint16_t, 8h, u16, q)
-__LD3R_FUNC (uint32x4x3_t, uint32x3_t, uint32_t, 4s, u32, q)
-__LD3R_FUNC (uint64x2x3_t, uint64x3_t, uint64_t, 2d, u64, q)
+#define __ST3_LANE_FUNC(intype, largetype, ptrtype,			     \
+			mode, ptr_mode, funcsuffix, signedtype)		     \
+__extension__ static __inline void					     \
+__attribute__ ((__always_inline__))					     \
+vst3_lane_ ## funcsuffix (ptrtype *__ptr,				     \
+			  intype __b, const int __c)			     \
+{									     \
+  __builtin_aarch64_simd_ci __o;					     \
+  largetype __temp;							     \
+  __temp.val[0]								     \
+    = vcombine_##funcsuffix (__b.val[0],				     \
+			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
+  __temp.val[1]								     \
+    = vcombine_##funcsuffix (__b.val[1],				     \
+			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
+  __temp.val[2]								     \
+    = vcombine_##funcsuffix (__b.val[2],				     \
+			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
+  __o = __builtin_aarch64_set_qregci##mode (__o,			     \
+					    (signedtype) __temp.val[0], 0);  \
+  __o = __builtin_aarch64_set_qregci##mode (__o,			     \
+					    (signedtype) __temp.val[1], 1);  \
+  __o = __builtin_aarch64_set_qregci##mode (__o,			     \
+					    (signedtype) __temp.val[2], 2);  \
+  __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *)  \
+				     __ptr, __o, __c);			     \
+}
 
-#define __LD3_LANE_FUNC(rettype, ptrtype, regsuffix,			\
-			lnsuffix, funcsuffix, Q)			\
-  __extension__ static __inline rettype					\
-  __attribute__ ((__always_inline__))					\
-  vld3 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr,		\
-				     rettype b, const int c)		\
-  {									\
-    rettype result;							\
-    __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t"	\
-	     "ld3 {v16." #lnsuffix " - v18." #lnsuffix "}[%3], %2\n\t"	\
-	     "st1 {v16." #regsuffix " - v18." #regsuffix "}, %0\n\t"	\
-	     : "=Q"(result)						\
-	     : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c)		\
-	     : "memory", "v16", "v17", "v18");				\
-    return result;							\
-  }
+__ST3_LANE_FUNC (float32x2x3_t, float32x4x3_t, float32_t, v4sf, sf, f32,
+		 float32x4_t)
+__ST3_LANE_FUNC (float64x1x3_t, float64x2x3_t, float64_t, v2df, df, f64,
+		 float64x2_t)
+__ST3_LANE_FUNC (poly8x8x3_t, poly8x16x3_t, poly8_t, v16qi, qi, p8, int8x16_t)
+__ST3_LANE_FUNC (poly16x4x3_t, poly16x8x3_t, poly16_t, v8hi, hi, p16,
+		 int16x8_t)
+__ST3_LANE_FUNC (int8x8x3_t, int8x16x3_t, int8_t, v16qi, qi, s8, int8x16_t)
+__ST3_LANE_FUNC (int16x4x3_t, int16x8x3_t, int16_t, v8hi, hi, s16, int16x8_t)
+__ST3_LANE_FUNC (int32x2x3_t, int32x4x3_t, int32_t, v4si, si, s32, int32x4_t)
+__ST3_LANE_FUNC (int64x1x3_t, int64x2x3_t, int64_t, v2di, di, s64, int64x2_t)
+__ST3_LANE_FUNC (uint8x8x3_t, uint8x16x3_t, uint8_t, v16qi, qi, u8, int8x16_t)
+__ST3_LANE_FUNC (uint16x4x3_t, uint16x8x3_t, uint16_t, v8hi, hi, u16,
+		 int16x8_t)
+__ST3_LANE_FUNC (uint32x2x3_t, uint32x4x3_t, uint32_t, v4si, si, u32,
+		 int32x4_t)
+__ST3_LANE_FUNC (uint64x1x3_t, uint64x2x3_t, uint64_t, v2di, di, u64,
+		 int64x2_t)
 
-__LD3_LANE_FUNC (int8x8x3_t, uint8_t, 8b, b, s8,)
-__LD3_LANE_FUNC (float32x2x3_t, float32_t, 2s, s, f32,)
-__LD3_LANE_FUNC (float64x1x3_t, float64_t, 1d, d, f64,)
-__LD3_LANE_FUNC (poly8x8x3_t, poly8_t, 8b, b, p8,)
-__LD3_LANE_FUNC (poly16x4x3_t, poly16_t, 4h, h, p16,)
-__LD3_LANE_FUNC (int16x4x3_t, int16_t, 4h, h, s16,)
-__LD3_LANE_FUNC (int32x2x3_t, int32_t, 2s, s, s32,)
-__LD3_LANE_FUNC (int64x1x3_t, int64_t, 1d, d, s64,)
-__LD3_LANE_FUNC (uint8x8x3_t, uint8_t, 8b, b, u8,)
-__LD3_LANE_FUNC (uint16x4x3_t, uint16_t, 4h, h, u16,)
-__LD3_LANE_FUNC (uint32x2x3_t, uint32_t, 2s, s, u32,)
-__LD3_LANE_FUNC (uint64x1x3_t, uint64_t, 1d, d, u64,)
-__LD3_LANE_FUNC (float32x4x3_t, float32_t, 4s, s, f32, q)
-__LD3_LANE_FUNC (float64x2x3_t, float64_t, 2d, d, f64, q)
-__LD3_LANE_FUNC (poly8x16x3_t, poly8_t, 16b, b, p8, q)
-__LD3_LANE_FUNC (poly16x8x3_t, poly16_t, 8h, h, p16, q)
-__LD3_LANE_FUNC (int8x16x3_t, int8_t, 16b, b, s8, q)
-__LD3_LANE_FUNC (int16x8x3_t, int16_t, 8h, h, s16, q)
-__LD3_LANE_FUNC (int32x4x3_t, int32_t, 4s, s, s32, q)
-__LD3_LANE_FUNC (int64x2x3_t, int64_t, 2d, d, s64, q)
-__LD3_LANE_FUNC (uint8x16x3_t, uint8_t, 16b, b, u8, q)
-__LD3_LANE_FUNC (uint16x8x3_t, uint16_t, 8h, h, u16, q)
-__LD3_LANE_FUNC (uint32x4x3_t, uint32_t, 4s, s, u32, q)
-__LD3_LANE_FUNC (uint64x2x3_t, uint64_t, 2d, d, u64, q)
+#undef __ST3_LANE_FUNC
+#define __ST3_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix)	    \
+__extension__ static __inline void					    \
+__attribute__ ((__always_inline__))					    \
+vst3q_lane_ ## funcsuffix (ptrtype *__ptr,				    \
+			   intype __b, const int __c)			    \
+{									    \
+  union { intype __i;							    \
+	  __builtin_aarch64_simd_ci __o; } __temp = { __b };		    \
+  __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
+				    __ptr, __temp.__o, __c);		    \
+}
 
-#define __LD4R_FUNC(rettype, structtype, ptrtype,			\
-		    regsuffix, funcsuffix, Q)				\
-  __extension__ static __inline rettype					\
-  __attribute__ ((__always_inline__))					\
-  vld4 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr)			\
-  {									\
-    rettype result;							\
-    __asm__ ("ld4r {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t"	\
-	     "st1 {v16." #regsuffix " - v19." #regsuffix "}, %0\n\t"	\
-	     : "=Q"(result)						\
-	     : "Q"(*(const structtype *)ptr)				\
-	     : "memory", "v16", "v17", "v18", "v19");			\
-    return result;							\
-  }
+__ST3_LANE_FUNC (float32x4x3_t, float32_t, v4sf, sf, f32)
+__ST3_LANE_FUNC (float64x2x3_t, float64_t, v2df, df, f64)
+__ST3_LANE_FUNC (poly8x16x3_t, poly8_t, v16qi, qi, p8)
+__ST3_LANE_FUNC (poly16x8x3_t, poly16_t, v8hi, hi, p16)
+__ST3_LANE_FUNC (int8x16x3_t, int8_t, v16qi, qi, s8)
+__ST3_LANE_FUNC (int16x8x3_t, int16_t, v8hi, hi, s16)
+__ST3_LANE_FUNC (int32x4x3_t, int32_t, v4si, si, s32)
+__ST3_LANE_FUNC (int64x2x3_t, int64_t, v2di, di, s64)
+__ST3_LANE_FUNC (uint8x16x3_t, uint8_t, v16qi, qi, u8)
+__ST3_LANE_FUNC (uint16x8x3_t, uint16_t, v8hi, hi, u16)
+__ST3_LANE_FUNC (uint32x4x3_t, uint32_t, v4si, si, u32)
+__ST3_LANE_FUNC (uint64x2x3_t, uint64_t, v2di, di, u64)
 
-__LD4R_FUNC (float32x2x4_t, float32x4_t, float32_t, 2s, f32,)
-__LD4R_FUNC (float64x1x4_t, float64x4_t, float64_t, 1d, f64,)
-__LD4R_FUNC (poly8x8x4_t, poly8x4_t, poly8_t, 8b, p8,)
-__LD4R_FUNC (poly16x4x4_t, poly16x4_t, poly16_t, 4h, p16,)
-__LD4R_FUNC (int8x8x4_t, int8x4_t, int8_t, 8b, s8,)
-__LD4R_FUNC (int16x4x4_t, int16x4_t, int16_t, 4h, s16,)
-__LD4R_FUNC (int32x2x4_t, int32x4_t, int32_t, 2s, s32,)
-__LD4R_FUNC (int64x1x4_t, int64x4_t, int64_t, 1d, s64,)
-__LD4R_FUNC (uint8x8x4_t, uint8x4_t, uint8_t, 8b, u8,)
-__LD4R_FUNC (uint16x4x4_t, uint16x4_t, uint16_t, 4h, u16,)
-__LD4R_FUNC (uint32x2x4_t, uint32x4_t, uint32_t, 2s, u32,)
-__LD4R_FUNC (uint64x1x4_t, uint64x4_t, uint64_t, 1d, u64,)
-__LD4R_FUNC (float32x4x4_t, float32x4_t, float32_t, 4s, f32, q)
-__LD4R_FUNC (float64x2x4_t, float64x4_t, float64_t, 2d, f64, q)
-__LD4R_FUNC (poly8x16x4_t, poly8x4_t, poly8_t, 16b, p8, q)
-__LD4R_FUNC (poly16x8x4_t, poly16x4_t, poly16_t, 8h, p16, q)
-__LD4R_FUNC (int8x16x4_t, int8x4_t, int8_t, 16b, s8, q)
-__LD4R_FUNC (int16x8x4_t, int16x4_t, int16_t, 8h, s16, q)
-__LD4R_FUNC (int32x4x4_t, int32x4_t, int32_t, 4s, s32, q)
-__LD4R_FUNC (int64x2x4_t, int64x4_t, int64_t, 2d, s64, q)
-__LD4R_FUNC (uint8x16x4_t, uint8x4_t, uint8_t, 16b, u8, q)
-__LD4R_FUNC (uint16x8x4_t, uint16x4_t, uint16_t, 8h, u16, q)
-__LD4R_FUNC (uint32x4x4_t, uint32x4_t, uint32_t, 4s, u32, q)
-__LD4R_FUNC (uint64x2x4_t, uint64x4_t, uint64_t, 2d, u64, q)
+#define __ST4_LANE_FUNC(intype, largetype, ptrtype,			     \
+			mode, ptr_mode, funcsuffix, signedtype)		     \
+__extension__ static __inline void					     \
+__attribute__ ((__always_inline__))					     \
+vst4_lane_ ## funcsuffix (ptrtype *__ptr,				     \
+			  intype __b, const int __c)			     \
+{									     \
+  __builtin_aarch64_simd_xi __o;					     \
+  largetype __temp;							     \
+  __temp.val[0]								     \
+    = vcombine_##funcsuffix (__b.val[0],				     \
+			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
+  __temp.val[1]								     \
+    = vcombine_##funcsuffix (__b.val[1],				     \
+			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
+  __temp.val[2]								     \
+    = vcombine_##funcsuffix (__b.val[2],				     \
+			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
+  __temp.val[3]								     \
+    = vcombine_##funcsuffix (__b.val[3],				     \
+			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
+  __o = __builtin_aarch64_set_qregxi##mode (__o,			     \
+					    (signedtype) __temp.val[0], 0);  \
+  __o = __builtin_aarch64_set_qregxi##mode (__o,			     \
+					    (signedtype) __temp.val[1], 1);  \
+  __o = __builtin_aarch64_set_qregxi##mode (__o,			     \
+					    (signedtype) __temp.val[2], 2);  \
+  __o = __builtin_aarch64_set_qregxi##mode (__o,			     \
+					    (signedtype) __temp.val[3], 3);  \
+  __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *)  \
+				     __ptr, __o, __c);			     \
+}
 
-#define __LD4_LANE_FUNC(rettype, ptrtype, regsuffix,			\
-			lnsuffix, funcsuffix, Q)			\
-  __extension__ static __inline rettype					\
-  __attribute__ ((__always_inline__))					\
-  vld4 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr,		\
-				     rettype b, const int c)		\
-  {									\
-    rettype result;							\
-    __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t"	\
-	     "ld4 {v16." #lnsuffix " - v19." #lnsuffix "}[%3], %2\n\t"	\
-	     "st1 {v16." #regsuffix " - v19." #regsuffix "}, %0\n\t"	\
-	     : "=Q"(result)						\
-	     : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c)		\
-	     : "memory", "v16", "v17", "v18", "v19");			\
-    return result;							\
-  }
+__ST4_LANE_FUNC (float32x2x4_t, float32x4x4_t, float32_t, v4sf, sf, f32,
+		 float32x4_t)
+__ST4_LANE_FUNC (float64x1x4_t, float64x2x4_t, float64_t, v2df, df, f64,
+		 float64x2_t)
+__ST4_LANE_FUNC (poly8x8x4_t, poly8x16x4_t, poly8_t, v16qi, qi, p8, int8x16_t)
+__ST4_LANE_FUNC (poly16x4x4_t, poly16x8x4_t, poly16_t, v8hi, hi, p16,
+		 int16x8_t)
+__ST4_LANE_FUNC (int8x8x4_t, int8x16x4_t, int8_t, v16qi, qi, s8, int8x16_t)
+__ST4_LANE_FUNC (int16x4x4_t, int16x8x4_t, int16_t, v8hi, hi, s16, int16x8_t)
+__ST4_LANE_FUNC (int32x2x4_t, int32x4x4_t, int32_t, v4si, si, s32, int32x4_t)
+__ST4_LANE_FUNC (int64x1x4_t, int64x2x4_t, int64_t, v2di, di, s64, int64x2_t)
+__ST4_LANE_FUNC (uint8x8x4_t, uint8x16x4_t, uint8_t, v16qi, qi, u8, int8x16_t)
+__ST4_LANE_FUNC (uint16x4x4_t, uint16x8x4_t, uint16_t, v8hi, hi, u16,
+		 int16x8_t)
+__ST4_LANE_FUNC (uint32x2x4_t, uint32x4x4_t, uint32_t, v4si, si, u32,
+		 int32x4_t)
+__ST4_LANE_FUNC (uint64x1x4_t, uint64x2x4_t, uint64_t, v2di, di, u64,
+		 int64x2_t)
 
-__LD4_LANE_FUNC (int8x8x4_t, uint8_t, 8b, b, s8,)
-__LD4_LANE_FUNC (float32x2x4_t, float32_t, 2s, s, f32,)
-__LD4_LANE_FUNC (float64x1x4_t, float64_t, 1d, d, f64,)
-__LD4_LANE_FUNC (poly8x8x4_t, poly8_t, 8b, b, p8,)
-__LD4_LANE_FUNC (poly16x4x4_t, poly16_t, 4h, h, p16,)
-__LD4_LANE_FUNC (int16x4x4_t, int16_t, 4h, h, s16,)
-__LD4_LANE_FUNC (int32x2x4_t, int32_t, 2s, s, s32,)
-__LD4_LANE_FUNC (int64x1x4_t, int64_t, 1d, d, s64,)
-__LD4_LANE_FUNC (uint8x8x4_t, uint8_t, 8b, b, u8,)
-__LD4_LANE_FUNC (uint16x4x4_t, uint16_t, 4h, h, u16,)
-__LD4_LANE_FUNC (uint32x2x4_t, uint32_t, 2s, s, u32,)
-__LD4_LANE_FUNC (uint64x1x4_t, uint64_t, 1d, d, u64,)
-__LD4_LANE_FUNC (float32x4x4_t, float32_t, 4s, s, f32, q)
-__LD4_LANE_FUNC (float64x2x4_t, float64_t, 2d, d, f64, q)
-__LD4_LANE_FUNC (poly8x16x4_t, poly8_t, 16b, b, p8, q)
-__LD4_LANE_FUNC (poly16x8x4_t, poly16_t, 8h, h, p16, q)
-__LD4_LANE_FUNC (int8x16x4_t, int8_t, 16b, b, s8, q)
-__LD4_LANE_FUNC (int16x8x4_t, int16_t, 8h, h, s16, q)
-__LD4_LANE_FUNC (int32x4x4_t, int32_t, 4s, s, s32, q)
-__LD4_LANE_FUNC (int64x2x4_t, int64_t, 2d, d, s64, q)
-__LD4_LANE_FUNC (uint8x16x4_t, uint8_t, 16b, b, u8, q)
-__LD4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q)
-__LD4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q)
-__LD4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q)
+#undef __ST4_LANE_FUNC
+#define __ST4_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix)	    \
+__extension__ static __inline void					    \
+__attribute__ ((__always_inline__))					    \
+vst4q_lane_ ## funcsuffix (ptrtype *__ptr,				    \
+			   intype __b, const int __c)			    \
+{									    \
+  union { intype __i;							    \
+	  __builtin_aarch64_simd_xi __o; } __temp = { __b };		    \
+  __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
+				    __ptr, __temp.__o, __c);		    \
+}
 
-#define __ST2_LANE_FUNC(intype, ptrtype, regsuffix,			\
-			lnsuffix, funcsuffix, Q)			\
-  typedef struct { ptrtype __x[2]; } __ST2_LANE_STRUCTURE_##intype;	\
-  __extension__ static __inline void					\
-  __attribute__ ((__always_inline__))					\
-  vst2 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr,			\
-				     intype b, const int c)		\
-  {									\
-    __ST2_LANE_STRUCTURE_##intype *__p =				\
-				(__ST2_LANE_STRUCTURE_##intype *)ptr;	\
-    __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t"	\
-	     "st2 {v16." #lnsuffix ", v17." #lnsuffix "}[%2], %0\n\t"	\
-	     : "=Q"(*__p)						\
-	     : "Q"(b), "i"(c)						\
-	     : "v16", "v17");						\
-  }
+__ST4_LANE_FUNC (float32x4x4_t, float32_t, v4sf, sf, f32)
+__ST4_LANE_FUNC (float64x2x4_t, float64_t, v2df, df, f64)
+__ST4_LANE_FUNC (poly8x16x4_t, poly8_t, v16qi, qi, p8)
+__ST4_LANE_FUNC (poly16x8x4_t, poly16_t, v8hi, hi, p16)
+__ST4_LANE_FUNC (int8x16x4_t, int8_t, v16qi, qi, s8)
+__ST4_LANE_FUNC (int16x8x4_t, int16_t, v8hi, hi, s16)
+__ST4_LANE_FUNC (int32x4x4_t, int32_t, v4si, si, s32)
+__ST4_LANE_FUNC (int64x2x4_t, int64_t, v2di, di, s64)
+__ST4_LANE_FUNC (uint8x16x4_t, uint8_t, v16qi, qi, u8)
+__ST4_LANE_FUNC (uint16x8x4_t, uint16_t, v8hi, hi, u16)
+__ST4_LANE_FUNC (uint32x4x4_t, uint32_t, v4si, si, u32)
+__ST4_LANE_FUNC (uint64x2x4_t, uint64_t, v2di, di, u64)
 
-__ST2_LANE_FUNC (int8x8x2_t, int8_t, 8b, b, s8,)
-__ST2_LANE_FUNC (float32x2x2_t, float32_t, 2s, s, f32,)
-__ST2_LANE_FUNC (float64x1x2_t, float64_t, 1d, d, f64,)
-__ST2_LANE_FUNC (poly8x8x2_t, poly8_t, 8b, b, p8,)
-__ST2_LANE_FUNC (poly16x4x2_t, poly16_t, 4h, h, p16,)
-__ST2_LANE_FUNC (int16x4x2_t, int16_t, 4h, h, s16,)
-__ST2_LANE_FUNC (int32x2x2_t, int32_t, 2s, s, s32,)
-__ST2_LANE_FUNC (int64x1x2_t, int64_t, 1d, d, s64,)
-__ST2_LANE_FUNC (uint8x8x2_t, uint8_t, 8b, b, u8,)
-__ST2_LANE_FUNC (uint16x4x2_t, uint16_t, 4h, h, u16,)
-__ST2_LANE_FUNC (uint32x2x2_t, uint32_t, 2s, s, u32,)
-__ST2_LANE_FUNC (uint64x1x2_t, uint64_t, 1d, d, u64,)
-__ST2_LANE_FUNC (float32x4x2_t, float32_t, 4s, s, f32, q)
-__ST2_LANE_FUNC (float64x2x2_t, float64_t, 2d, d, f64, q)
-__ST2_LANE_FUNC (poly8x16x2_t, poly8_t, 16b, b, p8, q)
-__ST2_LANE_FUNC (poly16x8x2_t, poly16_t, 8h, h, p16, q)
-__ST2_LANE_FUNC (int8x16x2_t, int8_t, 16b, b, s8, q)
-__ST2_LANE_FUNC (int16x8x2_t, int16_t, 8h, h, s16, q)
-__ST2_LANE_FUNC (int32x4x2_t, int32_t, 4s, s, s32, q)
-__ST2_LANE_FUNC (int64x2x2_t, int64_t, 2d, d, s64, q)
-__ST2_LANE_FUNC (uint8x16x2_t, uint8_t, 16b, b, u8, q)
-__ST2_LANE_FUNC (uint16x8x2_t, uint16_t, 8h, h, u16, q)
-__ST2_LANE_FUNC (uint32x4x2_t, uint32_t, 4s, s, u32, q)
-__ST2_LANE_FUNC (uint64x2x2_t, uint64_t, 2d, d, u64, q)
-
-#define __ST3_LANE_FUNC(intype, ptrtype, regsuffix,			\
-			lnsuffix, funcsuffix, Q)			\
-  typedef struct { ptrtype __x[3]; } __ST3_LANE_STRUCTURE_##intype;	\
-  __extension__ static __inline void					\
-  __attribute__ ((__always_inline__))					\
-  vst3 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr,			\
-				     intype b, const int c)		\
-  {									\
-    __ST3_LANE_STRUCTURE_##intype *__p =				\
-				(__ST3_LANE_STRUCTURE_##intype *)ptr;	\
-    __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t"	\
-	     "st3 {v16." #lnsuffix " - v18." #lnsuffix "}[%2], %0\n\t"	\
-	     : "=Q"(*__p)						\
-	     : "Q"(b), "i"(c)						\
-	     : "v16", "v17", "v18");					\
-  }
-
-__ST3_LANE_FUNC (int8x8x3_t, int8_t, 8b, b, s8,)
-__ST3_LANE_FUNC (float32x2x3_t, float32_t, 2s, s, f32,)
-__ST3_LANE_FUNC (float64x1x3_t, float64_t, 1d, d, f64,)
-__ST3_LANE_FUNC (poly8x8x3_t, poly8_t, 8b, b, p8,)
-__ST3_LANE_FUNC (poly16x4x3_t, poly16_t, 4h, h, p16,)
-__ST3_LANE_FUNC (int16x4x3_t, int16_t, 4h, h, s16,)
-__ST3_LANE_FUNC (int32x2x3_t, int32_t, 2s, s, s32,)
-__ST3_LANE_FUNC (int64x1x3_t, int64_t, 1d, d, s64,)
-__ST3_LANE_FUNC (uint8x8x3_t, uint8_t, 8b, b, u8,)
-__ST3_LANE_FUNC (uint16x4x3_t, uint16_t, 4h, h, u16,)
-__ST3_LANE_FUNC (uint32x2x3_t, uint32_t, 2s, s, u32,)
-__ST3_LANE_FUNC (uint64x1x3_t, uint64_t, 1d, d, u64,)
-__ST3_LANE_FUNC (float32x4x3_t, float32_t, 4s, s, f32, q)
-__ST3_LANE_FUNC (float64x2x3_t, float64_t, 2d, d, f64, q)
-__ST3_LANE_FUNC (poly8x16x3_t, poly8_t, 16b, b, p8, q)
-__ST3_LANE_FUNC (poly16x8x3_t, poly16_t, 8h, h, p16, q)
-__ST3_LANE_FUNC (int8x16x3_t, int8_t, 16b, b, s8, q)
-__ST3_LANE_FUNC (int16x8x3_t, int16_t, 8h, h, s16, q)
-__ST3_LANE_FUNC (int32x4x3_t, int32_t, 4s, s, s32, q)
-__ST3_LANE_FUNC (int64x2x3_t, int64_t, 2d, d, s64, q)
-__ST3_LANE_FUNC (uint8x16x3_t, uint8_t, 16b, b, u8, q)
-__ST3_LANE_FUNC (uint16x8x3_t, uint16_t, 8h, h, u16, q)
-__ST3_LANE_FUNC (uint32x4x3_t, uint32_t, 4s, s, u32, q)
-__ST3_LANE_FUNC (uint64x2x3_t, uint64_t, 2d, d, u64, q)
-
-#define __ST4_LANE_FUNC(intype, ptrtype, regsuffix,			\
-			lnsuffix, funcsuffix, Q)			\
-  typedef struct { ptrtype __x[4]; } __ST4_LANE_STRUCTURE_##intype;	\
-  __extension__ static __inline void					\
-  __attribute__ ((__always_inline__))					\
-  vst4 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr,			\
-				     intype b, const int c)		\
-  {									\
-    __ST4_LANE_STRUCTURE_##intype *__p =				\
-				(__ST4_LANE_STRUCTURE_##intype *)ptr;	\
-    __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t"	\
-	     "st4 {v16." #lnsuffix " - v19." #lnsuffix "}[%2], %0\n\t"	\
-	     : "=Q"(*__p)						\
-	     : "Q"(b), "i"(c)						\
-	     : "v16", "v17", "v18", "v19");				\
-  }
-
-__ST4_LANE_FUNC (int8x8x4_t, int8_t, 8b, b, s8,)
-__ST4_LANE_FUNC (float32x2x4_t, float32_t, 2s, s, f32,)
-__ST4_LANE_FUNC (float64x1x4_t, float64_t, 1d, d, f64,)
-__ST4_LANE_FUNC (poly8x8x4_t, poly8_t, 8b, b, p8,)
-__ST4_LANE_FUNC (poly16x4x4_t, poly16_t, 4h, h, p16,)
-__ST4_LANE_FUNC (int16x4x4_t, int16_t, 4h, h, s16,)
-__ST4_LANE_FUNC (int32x2x4_t, int32_t, 2s, s, s32,)
-__ST4_LANE_FUNC (int64x1x4_t, int64_t, 1d, d, s64,)
-__ST4_LANE_FUNC (uint8x8x4_t, uint8_t, 8b, b, u8,)
-__ST4_LANE_FUNC (uint16x4x4_t, uint16_t, 4h, h, u16,)
-__ST4_LANE_FUNC (uint32x2x4_t, uint32_t, 2s, s, u32,)
-__ST4_LANE_FUNC (uint64x1x4_t, uint64_t, 1d, d, u64,)
-__ST4_LANE_FUNC (float32x4x4_t, float32_t, 4s, s, f32, q)
-__ST4_LANE_FUNC (float64x2x4_t, float64_t, 2d, d, f64, q)
-__ST4_LANE_FUNC (poly8x16x4_t, poly8_t, 16b, b, p8, q)
-__ST4_LANE_FUNC (poly16x8x4_t, poly16_t, 8h, h, p16, q)
-__ST4_LANE_FUNC (int8x16x4_t, int8_t, 16b, b, s8, q)
-__ST4_LANE_FUNC (int16x8x4_t, int16_t, 8h, h, s16, q)
-__ST4_LANE_FUNC (int32x4x4_t, int32_t, 4s, s, s32, q)
-__ST4_LANE_FUNC (int64x2x4_t, int64_t, 2d, d, s64, q)
-__ST4_LANE_FUNC (uint8x16x4_t, uint8_t, 16b, b, u8, q)
-__ST4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q)
-__ST4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q)
-__ST4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q)
-
 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
 vaddlv_s32 (int32x2_t a)
 {
@@ -14341,12 +12147,6 @@
   return result;
 }
 
-__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vpaddd_s64 (int64x2_t __a)
-{
-  return __builtin_aarch64_addpdi (__a);
-}
-
 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
 vqdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
 {
@@ -15310,121 +13110,103 @@
 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
 vaddv_s8 (int8x8_t __a)
 {
-  return vget_lane_s8 (__builtin_aarch64_reduc_splus_v8qi (__a), 0);
+  return __builtin_aarch64_reduc_plus_scal_v8qi (__a);
 }
 
 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
 vaddv_s16 (int16x4_t __a)
 {
-  return vget_lane_s16 (__builtin_aarch64_reduc_splus_v4hi (__a), 0);
+  return __builtin_aarch64_reduc_plus_scal_v4hi (__a);
 }
 
 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
 vaddv_s32 (int32x2_t __a)
 {
-  return vget_lane_s32 (__builtin_aarch64_reduc_splus_v2si (__a), 0);
+  return __builtin_aarch64_reduc_plus_scal_v2si (__a);
 }
 
 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
 vaddv_u8 (uint8x8_t __a)
 {
-  return vget_lane_u8 ((uint8x8_t)
-		__builtin_aarch64_reduc_uplus_v8qi ((int8x8_t) __a),
-		0);
+  return (uint8_t) __builtin_aarch64_reduc_plus_scal_v8qi ((int8x8_t) __a);
 }
 
 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
 vaddv_u16 (uint16x4_t __a)
 {
-  return vget_lane_u16 ((uint16x4_t)
-		__builtin_aarch64_reduc_uplus_v4hi ((int16x4_t) __a),
-		0);
+  return (uint16_t) __builtin_aarch64_reduc_plus_scal_v4hi ((int16x4_t) __a);
 }
 
 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
 vaddv_u32 (uint32x2_t __a)
 {
-  return vget_lane_u32 ((uint32x2_t)
-		__builtin_aarch64_reduc_uplus_v2si ((int32x2_t) __a),
-		0);
+  return (int32_t) __builtin_aarch64_reduc_plus_scal_v2si ((int32x2_t) __a);
 }
 
 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
 vaddvq_s8 (int8x16_t __a)
 {
-  return vgetq_lane_s8 (__builtin_aarch64_reduc_splus_v16qi (__a),
-			0);
+  return __builtin_aarch64_reduc_plus_scal_v16qi (__a);
 }
 
 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
 vaddvq_s16 (int16x8_t __a)
 {
-  return vgetq_lane_s16 (__builtin_aarch64_reduc_splus_v8hi (__a), 0);
+  return __builtin_aarch64_reduc_plus_scal_v8hi (__a);
 }
 
 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
 vaddvq_s32 (int32x4_t __a)
 {
-  return vgetq_lane_s32 (__builtin_aarch64_reduc_splus_v4si (__a), 0);
+  return __builtin_aarch64_reduc_plus_scal_v4si (__a);
 }
 
 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
 vaddvq_s64 (int64x2_t __a)
 {
-  return vgetq_lane_s64 (__builtin_aarch64_reduc_splus_v2di (__a), 0);
+  return __builtin_aarch64_reduc_plus_scal_v2di (__a);
 }
 
 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
 vaddvq_u8 (uint8x16_t __a)
 {
-  return vgetq_lane_u8 ((uint8x16_t)
-		__builtin_aarch64_reduc_uplus_v16qi ((int8x16_t) __a),
-		0);
+  return (uint8_t) __builtin_aarch64_reduc_plus_scal_v16qi ((int8x16_t) __a);
 }
 
 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
 vaddvq_u16 (uint16x8_t __a)
 {
-  return vgetq_lane_u16 ((uint16x8_t)
-		__builtin_aarch64_reduc_uplus_v8hi ((int16x8_t) __a),
-		0);
+  return (uint16_t) __builtin_aarch64_reduc_plus_scal_v8hi ((int16x8_t) __a);
 }
 
 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
 vaddvq_u32 (uint32x4_t __a)
 {
-  return vgetq_lane_u32 ((uint32x4_t)
-		__builtin_aarch64_reduc_uplus_v4si ((int32x4_t) __a),
-		0);
+  return (uint32_t) __builtin_aarch64_reduc_plus_scal_v4si ((int32x4_t) __a);
 }
 
 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
 vaddvq_u64 (uint64x2_t __a)
 {
-  return vgetq_lane_u64 ((uint64x2_t)
-		__builtin_aarch64_reduc_uplus_v2di ((int64x2_t) __a),
-		0);
+  return (uint64_t) __builtin_aarch64_reduc_plus_scal_v2di ((int64x2_t) __a);
 }
 
 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
 vaddv_f32 (float32x2_t __a)
 {
-  float32x2_t __t = __builtin_aarch64_reduc_splus_v2sf (__a);
-  return vget_lane_f32 (__t, 0);
+  return __builtin_aarch64_reduc_plus_scal_v2sf (__a);
 }
 
 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
 vaddvq_f32 (float32x4_t __a)
 {
-  float32x4_t __t = __builtin_aarch64_reduc_splus_v4sf (__a);
-  return vgetq_lane_f32 (__t, 0);
+  return __builtin_aarch64_reduc_plus_scal_v4sf (__a);
 }
 
 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
 vaddvq_f64 (float64x2_t __a)
 {
-  float64x2_t __t = __builtin_aarch64_reduc_splus_v2df (__a);
-  return vgetq_lane_f64 (__t, 0);
+  return __builtin_aarch64_reduc_plus_scal_v2df (__a);
 }
 
 /* vbsl  */
@@ -15706,7 +13488,7 @@
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vceq_f32 (float32x2_t __a, float32x2_t __b)
 {
-  return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b);
+  return (uint32x2_t) (__a == __b);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -15718,26 +13500,25 @@
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vceq_p8 (poly8x8_t __a, poly8x8_t __b)
 {
-  return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
-						 (int8x8_t) __b);
+  return (uint8x8_t) (__a == __b);
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vceq_s8 (int8x8_t __a, int8x8_t __b)
 {
-  return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b);
+  return (uint8x8_t) (__a == __b);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vceq_s16 (int16x4_t __a, int16x4_t __b)
 {
-  return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b);
+  return (uint16x4_t) (__a == __b);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vceq_s32 (int32x2_t __a, int32x2_t __b)
 {
-  return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b);
+  return (uint32x2_t) (__a == __b);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -15749,22 +13530,19 @@
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vceq_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
-						 (int8x8_t) __b);
+  return (__a == __b);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vceq_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a,
-						  (int16x4_t) __b);
+  return (__a == __b);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vceq_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a,
-						  (int32x2_t) __b);
+  return (__a == __b);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -15776,72 +13554,67 @@
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vceqq_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b);
+  return (uint32x4_t) (__a == __b);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vceqq_f64 (float64x2_t __a, float64x2_t __b)
 {
-  return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b);
+  return (uint64x2_t) (__a == __b);
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vceqq_p8 (poly8x16_t __a, poly8x16_t __b)
 {
-  return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
-						   (int8x16_t) __b);
+  return (uint8x16_t) (__a == __b);
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vceqq_s8 (int8x16_t __a, int8x16_t __b)
 {
-  return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b);
+  return (uint8x16_t) (__a == __b);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vceqq_s16 (int16x8_t __a, int16x8_t __b)
 {
-  return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b);
+  return (uint16x8_t) (__a == __b);
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vceqq_s32 (int32x4_t __a, int32x4_t __b)
 {
-  return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b);
+  return (uint32x4_t) (__a == __b);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vceqq_s64 (int64x2_t __a, int64x2_t __b)
 {
-  return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b);
+  return (uint64x2_t) (__a == __b);
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vceqq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
-						   (int8x16_t) __b);
+  return (__a == __b);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vceqq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a,
-						  (int16x8_t) __b);
+  return (__a == __b);
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vceqq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a,
-						  (int32x4_t) __b);
+  return (__a == __b);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vceqq_u64 (uint64x2_t __a, uint64x2_t __b)
 {
-  return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a,
-						  (int64x2_t) __b);
+  return (__a == __b);
 }
 
 /* vceq - scalar.  */
@@ -15875,8 +13648,7 @@
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vceqz_f32 (float32x2_t __a)
 {
-  float32x2_t __b = {0.0f, 0.0f};
-  return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b);
+  return (uint32x2_t) (__a == 0.0f);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -15888,30 +13660,25 @@
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vceqz_p8 (poly8x8_t __a)
 {
-  poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
-  return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
-						 (int8x8_t) __b);
+  return (uint8x8_t) (__a == 0);
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vceqz_s8 (int8x8_t __a)
 {
-  int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
-  return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b);
+  return (uint8x8_t) (__a == 0);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vceqz_s16 (int16x4_t __a)
 {
-  int16x4_t __b = {0, 0, 0, 0};
-  return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b);
+  return (uint16x4_t) (__a == 0);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vceqz_s32 (int32x2_t __a)
 {
-  int32x2_t __b = {0, 0};
-  return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b);
+  return (uint32x2_t) (__a == 0);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -15923,25 +13690,19 @@
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vceqz_u8 (uint8x8_t __a)
 {
-  uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
-  return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
-						 (int8x8_t) __b);
+  return (__a == 0);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vceqz_u16 (uint16x4_t __a)
 {
-  uint16x4_t __b = {0, 0, 0, 0};
-  return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a,
-						  (int16x4_t) __b);
+  return (__a == 0);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vceqz_u32 (uint32x2_t __a)
 {
-  uint32x2_t __b = {0, 0};
-  return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a,
-						  (int32x2_t) __b);
+  return (__a == 0);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -15953,86 +13714,67 @@
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vceqzq_f32 (float32x4_t __a)
 {
-  float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
-  return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b);
+  return (uint32x4_t) (__a == 0.0f);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vceqzq_f64 (float64x2_t __a)
 {
-  float64x2_t __b = {0.0, 0.0};
-  return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b);
+  return (uint64x2_t) (__a == 0.0f);
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vceqzq_p8 (poly8x16_t __a)
 {
-  poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
-		    0, 0, 0, 0, 0, 0, 0, 0};
-  return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
-						   (int8x16_t) __b);
+  return (uint8x16_t) (__a == 0);
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vceqzq_s8 (int8x16_t __a)
 {
-  int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
-		   0, 0, 0, 0, 0, 0, 0, 0};
-  return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b);
+  return (uint8x16_t) (__a == 0);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vceqzq_s16 (int16x8_t __a)
 {
-  int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
-  return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b);
+  return (uint16x8_t) (__a == 0);
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vceqzq_s32 (int32x4_t __a)
 {
-  int32x4_t __b = {0, 0, 0, 0};
-  return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b);
+  return (uint32x4_t) (__a == 0);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vceqzq_s64 (int64x2_t __a)
 {
-  int64x2_t __b = {0, 0};
-  return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b);
+  return (uint64x2_t) (__a == __AARCH64_INT64_C (0));
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vceqzq_u8 (uint8x16_t __a)
 {
-  uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
-		    0, 0, 0, 0, 0, 0, 0, 0};
-  return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
-						   (int8x16_t) __b);
+  return (__a == 0);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vceqzq_u16 (uint16x8_t __a)
 {
-  uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
-  return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a,
-						  (int16x8_t) __b);
+  return (__a == 0);
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vceqzq_u32 (uint32x4_t __a)
 {
-  uint32x4_t __b = {0, 0, 0, 0};
-  return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a,
-						  (int32x4_t) __b);
+  return (__a == 0);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vceqzq_u64 (uint64x2_t __a)
 {
-  uint64x2_t __b = {0, 0};
-  return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a,
-						  (int64x2_t) __b);
+  return (__a == __AARCH64_UINT64_C (0));
 }
 
 /* vceqz - scalar.  */
@@ -16066,7 +13808,7 @@
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vcge_f32 (float32x2_t __a, float32x2_t __b)
 {
-  return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b);
+  return (uint32x2_t) (__a >= __b);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -16076,28 +13818,21 @@
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-vcge_p8 (poly8x8_t __a, poly8x8_t __b)
-{
-  return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __a,
-						 (int8x8_t) __b);
-}
-
-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vcge_s8 (int8x8_t __a, int8x8_t __b)
 {
-  return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b);
+  return (uint8x8_t) (__a >= __b);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vcge_s16 (int16x4_t __a, int16x4_t __b)
 {
-  return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b);
+  return (uint16x4_t) (__a >= __b);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vcge_s32 (int32x2_t __a, int32x2_t __b)
 {
-  return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b);
+  return (uint32x2_t) (__a >= __b);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -16109,22 +13844,19 @@
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vcge_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a,
-						 (int8x8_t) __b);
+  return (__a >= __b);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vcge_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a,
-						  (int16x4_t) __b);
+  return (__a >= __b);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vcge_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a,
-						  (int32x2_t) __b);
+  return (__a >= __b);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -16136,72 +13868,61 @@
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vcgeq_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b);
+  return (uint32x4_t) (__a >= __b);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vcgeq_f64 (float64x2_t __a, float64x2_t __b)
 {
-  return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b);
+  return (uint64x2_t) (__a >= __b);
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-vcgeq_p8 (poly8x16_t __a, poly8x16_t __b)
-{
-  return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __a,
-						   (int8x16_t) __b);
-}
-
-__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vcgeq_s8 (int8x16_t __a, int8x16_t __b)
 {
-  return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b);
+  return (uint8x16_t) (__a >= __b);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vcgeq_s16 (int16x8_t __a, int16x8_t __b)
 {
-  return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b);
+  return (uint16x8_t) (__a >= __b);
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vcgeq_s32 (int32x4_t __a, int32x4_t __b)
 {
-  return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b);
+  return (uint32x4_t) (__a >= __b);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vcgeq_s64 (int64x2_t __a, int64x2_t __b)
 {
-  return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b);
+  return (uint64x2_t) (__a >= __b);
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vcgeq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a,
-						   (int8x16_t) __b);
+  return (__a >= __b);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vcgeq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a,
-						  (int16x8_t) __b);
+  return (__a >= __b);
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vcgeq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a,
-						  (int32x4_t) __b);
+  return (__a >= __b);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vcgeq_u64 (uint64x2_t __a, uint64x2_t __b)
 {
-  return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a,
-						  (int64x2_t) __b);
+  return (__a >= __b);
 }
 
 /* vcge - scalar.  */
@@ -16235,8 +13956,7 @@
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vcgez_f32 (float32x2_t __a)
 {
-  float32x2_t __b = {0.0f, 0.0f};
-  return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b);
+  return (uint32x2_t) (__a >= 0.0f);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -16246,32 +13966,21 @@
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-vcgez_p8 (poly8x8_t __a)
-{
-  poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
-  return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __a,
-						 (int8x8_t) __b);
-}
-
-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vcgez_s8 (int8x8_t __a)
 {
-  int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
-  return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b);
+  return (uint8x8_t) (__a >= 0);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vcgez_s16 (int16x4_t __a)
 {
-  int16x4_t __b = {0, 0, 0, 0};
-  return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b);
+  return (uint16x4_t) (__a >= 0);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vcgez_s32 (int32x2_t __a)
 {
-  int32x2_t __b = {0, 0};
-  return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b);
+  return (uint32x2_t) (__a >= 0);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -16280,121 +13989,42 @@
   return __a >= 0ll ? -1ll : 0ll;
 }
 
-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-vcgez_u8 (uint8x8_t __a)
-{
-  uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
-  return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a,
-						 (int8x8_t) __b);
-}
-
-__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-vcgez_u16 (uint16x4_t __a)
-{
-  uint16x4_t __b = {0, 0, 0, 0};
-  return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a,
-						  (int16x4_t) __b);
-}
-
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-vcgez_u32 (uint32x2_t __a)
-{
-  uint32x2_t __b = {0, 0};
-  return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a,
-						  (int32x2_t) __b);
-}
-
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vcgez_u64 (uint64x1_t __a)
-{
-  return __a >= 0ll ? -1ll : 0ll;
-}
-
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vcgezq_f32 (float32x4_t __a)
 {
-  float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
-  return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b);
+  return (uint32x4_t) (__a >= 0.0f);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vcgezq_f64 (float64x2_t __a)
 {
-  float64x2_t __b = {0.0, 0.0};
-  return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b);
+  return (uint64x2_t) (__a >= 0.0);
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-vcgezq_p8 (poly8x16_t __a)
-{
-  poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
-		    0, 0, 0, 0, 0, 0, 0, 0};
-  return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __a,
-						   (int8x16_t) __b);
-}
-
-__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vcgezq_s8 (int8x16_t __a)
 {
-  int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
-		   0, 0, 0, 0, 0, 0, 0, 0};
-  return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b);
+  return (uint8x16_t) (__a >= 0);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vcgezq_s16 (int16x8_t __a)
 {
-  int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
-  return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b);
+  return (uint16x8_t) (__a >= 0);
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vcgezq_s32 (int32x4_t __a)
 {
-  int32x4_t __b = {0, 0, 0, 0};
-  return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b);
+  return (uint32x4_t) (__a >= 0);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vcgezq_s64 (int64x2_t __a)
 {
-  int64x2_t __b = {0, 0};
-  return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b);
+  return (uint64x2_t) (__a >= __AARCH64_INT64_C (0));
 }
 
-__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-vcgezq_u8 (uint8x16_t __a)
-{
-  uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
-		    0, 0, 0, 0, 0, 0, 0, 0};
-  return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a,
-						   (int8x16_t) __b);
-}
-
-__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
-vcgezq_u16 (uint16x8_t __a)
-{
-  uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
-  return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a,
-						  (int16x8_t) __b);
-}
-
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-vcgezq_u32 (uint32x4_t __a)
-{
-  uint32x4_t __b = {0, 0, 0, 0};
-  return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a,
-						  (int32x4_t) __b);
-}
-
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-vcgezq_u64 (uint64x2_t __a)
-{
-  uint64x2_t __b = {0, 0};
-  return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a,
-						  (int64x2_t) __b);
-}
-
 /* vcgez - scalar.  */
 
 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
@@ -16409,12 +14039,6 @@
   return __a >= 0 ? -1ll : 0ll;
 }
 
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vcgezd_u64 (int64x1_t __a)
-{
-  return __a >= 0 ? -1ll : 0ll;
-}
-
 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
 vcgezd_f64 (float64_t __a)
 {
@@ -16426,7 +14050,7 @@
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vcgt_f32 (float32x2_t __a, float32x2_t __b)
 {
-  return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b);
+  return (uint32x2_t) (__a > __b);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -16436,28 +14060,21 @@
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-vcgt_p8 (poly8x8_t __a, poly8x8_t __b)
-{
-  return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __a,
-						 (int8x8_t) __b);
-}
-
-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vcgt_s8 (int8x8_t __a, int8x8_t __b)
 {
-  return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b);
+  return (uint8x8_t) (__a > __b);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vcgt_s16 (int16x4_t __a, int16x4_t __b)
 {
-  return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b);
+  return (uint16x4_t) (__a > __b);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vcgt_s32 (int32x2_t __a, int32x2_t __b)
 {
-  return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b);
+  return (uint32x2_t) (__a > __b);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -16469,22 +14086,19 @@
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vcgt_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a,
-						 (int8x8_t) __b);
+  return (__a > __b);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vcgt_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a,
-						  (int16x4_t) __b);
+  return (__a > __b);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vcgt_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a,
-						  (int32x2_t) __b);
+  return (__a > __b);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -16496,72 +14110,61 @@
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vcgtq_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b);
+  return (uint32x4_t) (__a > __b);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vcgtq_f64 (float64x2_t __a, float64x2_t __b)
 {
-  return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b);
+  return (uint64x2_t) (__a > __b);
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-vcgtq_p8 (poly8x16_t __a, poly8x16_t __b)
-{
-  return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __a,
-						   (int8x16_t) __b);
-}
-
-__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vcgtq_s8 (int8x16_t __a, int8x16_t __b)
 {
-  return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b);
+  return (uint8x16_t) (__a > __b);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vcgtq_s16 (int16x8_t __a, int16x8_t __b)
 {
-  return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b);
+  return (uint16x8_t) (__a > __b);
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vcgtq_s32 (int32x4_t __a, int32x4_t __b)
 {
-  return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b);
+  return (uint32x4_t) (__a > __b);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vcgtq_s64 (int64x2_t __a, int64x2_t __b)
 {
-  return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b);
+  return (uint64x2_t) (__a > __b);
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vcgtq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a,
-						   (int8x16_t) __b);
+  return (__a > __b);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vcgtq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a,
-						  (int16x8_t) __b);
+  return (__a > __b);
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vcgtq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a,
-						  (int32x4_t) __b);
+  return (__a > __b);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vcgtq_u64 (uint64x2_t __a, uint64x2_t __b)
 {
-  return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a,
-						  (int64x2_t) __b);
+  return (__a > __b);
 }
 
 /* vcgt - scalar.  */
@@ -16595,8 +14198,7 @@
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vcgtz_f32 (float32x2_t __a)
 {
-  float32x2_t __b = {0.0f, 0.0f};
-  return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b);
+  return (uint32x2_t) (__a > 0.0f);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -16606,32 +14208,21 @@
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-vcgtz_p8 (poly8x8_t __a)
-{
-  poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
-  return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __a,
-						 (int8x8_t) __b);
-}
-
-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vcgtz_s8 (int8x8_t __a)
 {
-  int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
-  return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b);
+  return (uint8x8_t) (__a > 0);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vcgtz_s16 (int16x4_t __a)
 {
-  int16x4_t __b = {0, 0, 0, 0};
-  return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b);
+  return (uint16x4_t) (__a > 0);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vcgtz_s32 (int32x2_t __a)
 {
-  int32x2_t __b = {0, 0};
-  return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b);
+  return (uint32x2_t) (__a > 0);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -16640,121 +14231,42 @@
   return __a > 0ll ? -1ll : 0ll;
 }
 
-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-vcgtz_u8 (uint8x8_t __a)
-{
-  uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
-  return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a,
-						 (int8x8_t) __b);
-}
-
-__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-vcgtz_u16 (uint16x4_t __a)
-{
-  uint16x4_t __b = {0, 0, 0, 0};
-  return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a,
-						  (int16x4_t) __b);
-}
-
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-vcgtz_u32 (uint32x2_t __a)
-{
-  uint32x2_t __b = {0, 0};
-  return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a,
-						  (int32x2_t) __b);
-}
-
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vcgtz_u64 (uint64x1_t __a)
-{
-  return __a > 0ll ? -1ll : 0ll;
-}
-
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vcgtzq_f32 (float32x4_t __a)
 {
-  float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
-  return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b);
+  return (uint32x4_t) (__a > 0.0f);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vcgtzq_f64 (float64x2_t __a)
 {
-  float64x2_t __b = {0.0, 0.0};
-  return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b);
+    return (uint64x2_t) (__a > 0.0);
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-vcgtzq_p8 (poly8x16_t __a)
-{
-  poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
-		    0, 0, 0, 0, 0, 0, 0, 0};
-  return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __a,
-						   (int8x16_t) __b);
-}
-
-__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vcgtzq_s8 (int8x16_t __a)
 {
-  int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
-		   0, 0, 0, 0, 0, 0, 0, 0};
-  return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b);
+  return (uint8x16_t) (__a > 0);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vcgtzq_s16 (int16x8_t __a)
 {
-  int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
-  return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b);
+  return (uint16x8_t) (__a > 0);
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vcgtzq_s32 (int32x4_t __a)
 {
-  int32x4_t __b = {0, 0, 0, 0};
-  return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b);
+  return (uint32x4_t) (__a > 0);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vcgtzq_s64 (int64x2_t __a)
 {
-  int64x2_t __b = {0, 0};
-  return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b);
+  return (uint64x2_t) (__a > __AARCH64_INT64_C (0));
 }
 
-__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-vcgtzq_u8 (uint8x16_t __a)
-{
-  uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
-		    0, 0, 0, 0, 0, 0, 0, 0};
-  return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a,
-						   (int8x16_t) __b);
-}
-
-__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
-vcgtzq_u16 (uint16x8_t __a)
-{
-  uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
-  return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a,
-						  (int16x8_t) __b);
-}
-
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-vcgtzq_u32 (uint32x4_t __a)
-{
-  uint32x4_t __b = {0, 0, 0, 0};
-  return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a,
-						  (int32x4_t) __b);
-}
-
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-vcgtzq_u64 (uint64x2_t __a)
-{
-  uint64x2_t __b = {0, 0};
-  return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a,
-						  (int64x2_t) __b);
-}
-
 /* vcgtz - scalar.  */
 
 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
@@ -16769,12 +14281,6 @@
   return __a > 0 ? -1ll : 0ll;
 }
 
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vcgtzd_u64 (int64x1_t __a)
-{
-  return __a > 0 ? -1ll : 0ll;
-}
-
 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
 vcgtzd_f64 (float64_t __a)
 {
@@ -16786,7 +14292,7 @@
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vcle_f32 (float32x2_t __a, float32x2_t __b)
 {
-  return (uint32x2_t) __builtin_aarch64_cmgev2sf (__b, __a);
+  return (uint32x2_t) (__a <= __b);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -16796,28 +14302,21 @@
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-vcle_p8 (poly8x8_t __a, poly8x8_t __b)
-{
-  return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __b,
-						 (int8x8_t) __a);
-}
-
-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vcle_s8 (int8x8_t __a, int8x8_t __b)
 {
-  return (uint8x8_t) __builtin_aarch64_cmgev8qi (__b, __a);
+  return (uint8x8_t) (__a <= __b);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vcle_s16 (int16x4_t __a, int16x4_t __b)
 {
-  return (uint16x4_t) __builtin_aarch64_cmgev4hi (__b, __a);
+  return (uint16x4_t) (__a <= __b);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vcle_s32 (int32x2_t __a, int32x2_t __b)
 {
-  return (uint32x2_t) __builtin_aarch64_cmgev2si (__b, __a);
+  return (uint32x2_t) (__a <= __b);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -16829,22 +14328,19 @@
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vcle_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __b,
-						 (int8x8_t) __a);
+  return (__a <= __b);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vcle_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __b,
-						  (int16x4_t) __a);
+  return (__a <= __b);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vcle_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __b,
-						  (int32x2_t) __a);
+  return (__a <= __b);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -16856,72 +14352,61 @@
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vcleq_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return (uint32x4_t) __builtin_aarch64_cmgev4sf (__b, __a);
+  return (uint32x4_t) (__a <= __b);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vcleq_f64 (float64x2_t __a, float64x2_t __b)
 {
-  return (uint64x2_t) __builtin_aarch64_cmgev2df (__b, __a);
+  return (uint64x2_t) (__a <= __b);
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-vcleq_p8 (poly8x16_t __a, poly8x16_t __b)
-{
-  return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __b,
-						   (int8x16_t) __a);
-}
-
-__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vcleq_s8 (int8x16_t __a, int8x16_t __b)
 {
-  return (uint8x16_t) __builtin_aarch64_cmgev16qi (__b, __a);
+  return (uint8x16_t) (__a <= __b);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vcleq_s16 (int16x8_t __a, int16x8_t __b)
 {
-  return (uint16x8_t) __builtin_aarch64_cmgev8hi (__b, __a);
+  return (uint16x8_t) (__a <= __b);
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vcleq_s32 (int32x4_t __a, int32x4_t __b)
 {
-  return (uint32x4_t) __builtin_aarch64_cmgev4si (__b, __a);
+  return (uint32x4_t) (__a <= __b);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vcleq_s64 (int64x2_t __a, int64x2_t __b)
 {
-  return (uint64x2_t) __builtin_aarch64_cmgev2di (__b, __a);
+  return (uint64x2_t) (__a <= __b);
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vcleq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __b,
-						   (int8x16_t) __a);
+  return (__a <= __b);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vcleq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __b,
-						  (int16x8_t) __a);
+  return (__a <= __b);
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vcleq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __b,
-						  (int32x4_t) __a);
+  return (__a <= __b);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vcleq_u64 (uint64x2_t __a, uint64x2_t __b)
 {
-  return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __b,
-						  (int64x2_t) __a);
+  return (__a <= __b);
 }
 
 /* vcle - scalar.  */
@@ -16955,8 +14440,7 @@
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vclez_f32 (float32x2_t __a)
 {
-  float32x2_t __b = {0.0f, 0.0f};
-  return (uint32x2_t) __builtin_aarch64_cmlev2sf (__a, __b);
+  return (uint32x2_t) (__a <= 0.0f);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -16966,32 +14450,21 @@
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-vclez_p8 (poly8x8_t __a)
-{
-  poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
-  return (uint8x8_t) __builtin_aarch64_cmlev8qi ((int8x8_t) __a,
-						 (int8x8_t) __b);
-}
-
-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vclez_s8 (int8x8_t __a)
 {
-  int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
-  return (uint8x8_t) __builtin_aarch64_cmlev8qi (__a, __b);
+  return (uint8x8_t) (__a <= 0);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vclez_s16 (int16x4_t __a)
 {
-  int16x4_t __b = {0, 0, 0, 0};
-  return (uint16x4_t) __builtin_aarch64_cmlev4hi (__a, __b);
+  return (uint16x4_t) (__a <= 0);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vclez_s32 (int32x2_t __a)
 {
-  int32x2_t __b = {0, 0};
-  return (uint32x2_t) __builtin_aarch64_cmlev2si (__a, __b);
+  return (uint32x2_t) (__a <= 0);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -17000,62 +14473,40 @@
   return __a <= 0ll ? -1ll : 0ll;
 }
 
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vclez_u64 (uint64x1_t __a)
-{
-  return __a <= 0ll ? -1ll : 0ll;
-}
-
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vclezq_f32 (float32x4_t __a)
 {
-  float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
-  return (uint32x4_t) __builtin_aarch64_cmlev4sf (__a, __b);
+  return (uint32x4_t) (__a <= 0.0f);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vclezq_f64 (float64x2_t __a)
 {
-  float64x2_t __b = {0.0, 0.0};
-  return (uint64x2_t) __builtin_aarch64_cmlev2df (__a, __b);
+  return (uint64x2_t) (__a <= 0.0);
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-vclezq_p8 (poly8x16_t __a)
-{
-  poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
-		    0, 0, 0, 0, 0, 0, 0, 0};
-  return (uint8x16_t) __builtin_aarch64_cmlev16qi ((int8x16_t) __a,
-						   (int8x16_t) __b);
-}
-
-__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vclezq_s8 (int8x16_t __a)
 {
-  int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
-		   0, 0, 0, 0, 0, 0, 0, 0};
-  return (uint8x16_t) __builtin_aarch64_cmlev16qi (__a, __b);
+  return (uint8x16_t) (__a <= 0);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vclezq_s16 (int16x8_t __a)
 {
-  int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
-  return (uint16x8_t) __builtin_aarch64_cmlev8hi (__a, __b);
+  return (uint16x8_t) (__a <= 0);
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vclezq_s32 (int32x4_t __a)
 {
-  int32x4_t __b = {0, 0, 0, 0};
-  return (uint32x4_t) __builtin_aarch64_cmlev4si (__a, __b);
+  return (uint32x4_t) (__a <= 0);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vclezq_s64 (int64x2_t __a)
 {
-  int64x2_t __b = {0, 0};
-  return (uint64x2_t) __builtin_aarch64_cmlev2di (__a, __b);
+  return (uint64x2_t) (__a <= __AARCH64_INT64_C (0));
 }
 
 /* vclez - scalar.  */
@@ -17072,12 +14523,6 @@
   return __a <= 0 ? -1ll : 0ll;
 }
 
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vclezd_u64 (int64x1_t __a)
-{
-  return __a <= 0 ? -1ll : 0ll;
-}
-
 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
 vclezd_f64 (float64_t __a)
 {
@@ -17089,7 +14534,7 @@
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vclt_f32 (float32x2_t __a, float32x2_t __b)
 {
-  return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__b, __a);
+  return (uint32x2_t) (__a < __b);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -17099,28 +14544,21 @@
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-vclt_p8 (poly8x8_t __a, poly8x8_t __b)
-{
-  return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __b,
-						 (int8x8_t) __a);
-}
-
-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vclt_s8 (int8x8_t __a, int8x8_t __b)
 {
-  return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__b, __a);
+  return (uint8x8_t) (__a < __b);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vclt_s16 (int16x4_t __a, int16x4_t __b)
 {
-  return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__b, __a);
+  return (uint16x4_t) (__a < __b);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vclt_s32 (int32x2_t __a, int32x2_t __b)
 {
-  return (uint32x2_t) __builtin_aarch64_cmgtv2si (__b, __a);
+  return (uint32x2_t) (__a < __b);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -17132,22 +14570,19 @@
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vclt_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __b,
-						 (int8x8_t) __a);
+  return (__a < __b);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vclt_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __b,
-						  (int16x4_t) __a);
+  return (__a < __b);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vclt_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __b,
-						  (int32x2_t) __a);
+  return (__a < __b);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -17159,72 +14594,61 @@
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vcltq_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__b, __a);
+  return (uint32x4_t) (__a < __b);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vcltq_f64 (float64x2_t __a, float64x2_t __b)
 {
-  return (uint64x2_t) __builtin_aarch64_cmgtv2df (__b, __a);
+  return (uint64x2_t) (__a < __b);
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-vcltq_p8 (poly8x16_t __a, poly8x16_t __b)
-{
-  return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __b,
-						   (int8x16_t) __a);
-}
-
-__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vcltq_s8 (int8x16_t __a, int8x16_t __b)
 {
-  return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__b, __a);
+  return (uint8x16_t) (__a < __b);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vcltq_s16 (int16x8_t __a, int16x8_t __b)
 {
-  return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__b, __a);
+  return (uint16x8_t) (__a < __b);
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vcltq_s32 (int32x4_t __a, int32x4_t __b)
 {
-  return (uint32x4_t) __builtin_aarch64_cmgtv4si (__b, __a);
+  return (uint32x4_t) (__a < __b);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vcltq_s64 (int64x2_t __a, int64x2_t __b)
 {
-  return (uint64x2_t) __builtin_aarch64_cmgtv2di (__b, __a);
+  return (uint64x2_t) (__a < __b);
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vcltq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __b,
-						   (int8x16_t) __a);
+  return (__a < __b);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vcltq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __b,
-						  (int16x8_t) __a);
+  return (__a < __b);
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vcltq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __b,
-						  (int32x4_t) __a);
+  return (__a < __b);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vcltq_u64 (uint64x2_t __a, uint64x2_t __b)
 {
-  return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __b,
-						  (int64x2_t) __a);
+  return (__a < __b);
 }
 
 /* vclt - scalar.  */
@@ -17258,8 +14682,7 @@
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vcltz_f32 (float32x2_t __a)
 {
-  float32x2_t __b = {0.0f, 0.0f};
-  return (uint32x2_t) __builtin_aarch64_cmltv2sf (__a, __b);
+  return (uint32x2_t) (__a < 0.0f);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -17269,32 +14692,21 @@
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-vcltz_p8 (poly8x8_t __a)
-{
-  poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
-  return (uint8x8_t) __builtin_aarch64_cmltv8qi ((int8x8_t) __a,
-						 (int8x8_t) __b);
-}
-
-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vcltz_s8 (int8x8_t __a)
 {
-  int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
-  return (uint8x8_t) __builtin_aarch64_cmltv8qi (__a, __b);
+  return (uint8x8_t) (__a < 0);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vcltz_s16 (int16x4_t __a)
 {
-  int16x4_t __b = {0, 0, 0, 0};
-  return (uint16x4_t) __builtin_aarch64_cmltv4hi (__a, __b);
+  return (uint16x4_t) (__a < 0);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vcltz_s32 (int32x2_t __a)
 {
-  int32x2_t __b = {0, 0};
-  return (uint32x2_t) __builtin_aarch64_cmltv2si (__a, __b);
+  return (uint32x2_t) (__a < 0);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -17306,53 +14718,37 @@
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vcltzq_f32 (float32x4_t __a)
 {
-  float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
-  return (uint32x4_t) __builtin_aarch64_cmltv4sf (__a, __b);
+  return (uint32x4_t) (__a < 0.0f);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vcltzq_f64 (float64x2_t __a)
 {
-  float64x2_t __b = {0.0, 0.0};
-  return (uint64x2_t) __builtin_aarch64_cmltv2df (__a, __b);
+  return (uint64x2_t) (__a < 0.0);
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-vcltzq_p8 (poly8x16_t __a)
-{
-  poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
-		    0, 0, 0, 0, 0, 0, 0, 0};
-  return (uint8x16_t) __builtin_aarch64_cmltv16qi ((int8x16_t) __a,
-						   (int8x16_t) __b);
-}
-
-__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vcltzq_s8 (int8x16_t __a)
 {
-  int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
-		   0, 0, 0, 0, 0, 0, 0, 0};
-  return (uint8x16_t) __builtin_aarch64_cmltv16qi (__a, __b);
+  return (uint8x16_t) (__a < 0);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vcltzq_s16 (int16x8_t __a)
 {
-  int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
-  return (uint16x8_t) __builtin_aarch64_cmltv8hi (__a, __b);
+  return (uint16x8_t) (__a < 0);
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vcltzq_s32 (int32x4_t __a)
 {
-  int32x4_t __b = {0, 0, 0, 0};
-  return (uint32x4_t) __builtin_aarch64_cmltv4si (__a, __b);
+  return (uint32x4_t) (__a < 0);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vcltzq_s64 (int64x2_t __a)
 {
-  int64x2_t __b = {0, 0};
-  return (uint64x2_t) __builtin_aarch64_cmltv2di (__a, __b);
+  return (uint64x2_t) (__a < __AARCH64_INT64_C (0));
 }
 
 /* vcltz - scalar.  */
@@ -17369,12 +14765,6 @@
   return __a < 0 ? -1ll : 0ll;
 }
 
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vcltzd_u64 (int64x1_t __a)
-{
-  return __a < 0 ? -1ll : 0ll;
-}
-
 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
 vcltzd_f64 (float64_t __a)
 {
@@ -18483,6 +15873,292 @@
   return __aarch64_vgetq_lane_u64 (__a, __b);
 }
 
+/* vext  */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vext_f32 (float32x2_t __a, float32x2_t __b, __const int __c)
+{
+  __builtin_aarch64_im_lane_boundsi (__c, 2);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
+#endif
+}
+
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vext_f64 (float64x1_t __a, float64x1_t __b, __const int __c)
+{
+  /* The only possible index to the assembler instruction returns element 0.  */
+  __builtin_aarch64_im_lane_boundsi (__c, 1);
+  return __a;
+}
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vext_p8 (poly8x8_t __a, poly8x8_t __b, __const int __c)
+{
+  __builtin_aarch64_im_lane_boundsi (__c, 8);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a, (uint8x8_t)
+      {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
+#else
+  return __builtin_shuffle (__a, __b,
+      (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
+#endif
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vext_p16 (poly16x4_t __a, poly16x4_t __b, __const int __c)
+{
+  __builtin_aarch64_im_lane_boundsi (__c, 4);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a,
+      (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
+#endif
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vext_s8 (int8x8_t __a, int8x8_t __b, __const int __c)
+{
+  __builtin_aarch64_im_lane_boundsi (__c, 8);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a, (uint8x8_t)
+      {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
+#else
+  return __builtin_shuffle (__a, __b,
+      (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
+#endif
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vext_s16 (int16x4_t __a, int16x4_t __b, __const int __c)
+{
+  __builtin_aarch64_im_lane_boundsi (__c, 4);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a,
+      (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
+#endif
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vext_s32 (int32x2_t __a, int32x2_t __b, __const int __c)
+{
+  __builtin_aarch64_im_lane_boundsi (__c, 2);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
+#endif
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vext_s64 (int64x1_t __a, int64x1_t __b, __const int __c)
+{
+  /* The only possible index to the assembler instruction returns element 0.  */
+  __builtin_aarch64_im_lane_boundsi (__c, 1);
+  return __a;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vext_u8 (uint8x8_t __a, uint8x8_t __b, __const int __c)
+{
+  __builtin_aarch64_im_lane_boundsi (__c, 8);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a, (uint8x8_t)
+      {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
+#else
+  return __builtin_shuffle (__a, __b,
+      (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
+#endif
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vext_u16 (uint16x4_t __a, uint16x4_t __b, __const int __c)
+{
+  __builtin_aarch64_im_lane_boundsi (__c, 4);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a,
+      (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
+#endif
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vext_u32 (uint32x2_t __a, uint32x2_t __b, __const int __c)
+{
+  __builtin_aarch64_im_lane_boundsi (__c, 2);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
+#endif
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vext_u64 (uint64x1_t __a, uint64x1_t __b, __const int __c)
+{
+  /* The only possible index to the assembler instruction returns element 0.  */
+  __builtin_aarch64_im_lane_boundsi (__c, 1);
+  return __a;
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vextq_f32 (float32x4_t __a, float32x4_t __b, __const int __c)
+{
+  __builtin_aarch64_im_lane_boundsi (__c, 4);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a,
+      (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
+#endif
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vextq_f64 (float64x2_t __a, float64x2_t __b, __const int __c)
+{
+  __builtin_aarch64_im_lane_boundsi (__c, 2);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
+#else
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
+#endif
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vextq_p8 (poly8x16_t __a, poly8x16_t __b, __const int __c)
+{
+  __builtin_aarch64_im_lane_boundsi (__c, 16);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a, (uint8x16_t)
+      {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
+       24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x16_t)
+      {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
+       __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
+#endif
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vextq_p16 (poly16x8_t __a, poly16x8_t __b, __const int __c)
+{
+  __builtin_aarch64_im_lane_boundsi (__c, 8);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a, (uint16x8_t)
+      {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
+#else
+  return __builtin_shuffle (__a, __b,
+      (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
+#endif
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vextq_s8 (int8x16_t __a, int8x16_t __b, __const int __c)
+{
+  __builtin_aarch64_im_lane_boundsi (__c, 16);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a, (uint8x16_t)
+      {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
+       24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x16_t)
+      {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
+       __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
+#endif
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vextq_s16 (int16x8_t __a, int16x8_t __b, __const int __c)
+{
+  __builtin_aarch64_im_lane_boundsi (__c, 8);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a, (uint16x8_t)
+      {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
+#else
+  return __builtin_shuffle (__a, __b,
+      (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
+#endif
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vextq_s32 (int32x4_t __a, int32x4_t __b, __const int __c)
+{
+  __builtin_aarch64_im_lane_boundsi (__c, 4);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a,
+      (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
+#endif
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vextq_s64 (int64x2_t __a, int64x2_t __b, __const int __c)
+{
+  __builtin_aarch64_im_lane_boundsi (__c, 2);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
+#else
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
+#endif
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vextq_u8 (uint8x16_t __a, uint8x16_t __b, __const int __c)
+{
+  __builtin_aarch64_im_lane_boundsi (__c, 16);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a, (uint8x16_t)
+      {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
+       24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x16_t)
+      {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
+       __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
+#endif
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vextq_u16 (uint16x8_t __a, uint16x8_t __b, __const int __c)
+{
+  __builtin_aarch64_im_lane_boundsi (__c, 8);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a, (uint16x8_t)
+      {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
+#else
+  return __builtin_shuffle (__a, __b,
+      (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
+#endif
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vextq_u32 (uint32x4_t __a, uint32x4_t __b, __const int __c)
+{
+  __builtin_aarch64_im_lane_boundsi (__c, 4);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a,
+      (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
+#endif
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vextq_u64 (uint64x2_t __a, uint64x2_t __b, __const int __c)
+{
+  __builtin_aarch64_im_lane_boundsi (__c, 2);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
+#else
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
+#endif
+}
+
 /* vfma_lane  */
 
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
@@ -19712,6 +17388,1234 @@
   return ret;
 }
 
+/* vldn_dup */
+
+__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
+vld2_dup_s8 (const int8_t * __a)
+{
+  int8x8x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
+  ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
+  return ret;
+}
+
+__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
+vld2_dup_s16 (const int16_t * __a)
+{
+  int16x4x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
+  ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
+  return ret;
+}
+
+__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
+vld2_dup_s32 (const int32_t * __a)
+{
+  int32x2x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv2si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
+  ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
+  return ret;
+}
+
+__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
+vld2_dup_f32 (const float32_t * __a)
+{
+  float32x2x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv2sf ((const __builtin_aarch64_simd_sf *) __a);
+  ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0);
+  ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1);
+  return ret;
+}
+
+__extension__ static __inline float64x1x2_t __attribute__ ((__always_inline__))
+vld2_dup_f64 (const float64_t * __a)
+{
+  float64x1x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rdf ((const __builtin_aarch64_simd_df *) __a);
+  ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 0)};
+  ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 1)};
+  return ret;
+}
+
+__extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
+vld2_dup_u8 (const uint8_t * __a)
+{
+  uint8x8x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
+  ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
+  return ret;
+}
+
+__extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
+vld2_dup_u16 (const uint16_t * __a)
+{
+  uint16x4x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
+  ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
+  return ret;
+}
+
+__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
+vld2_dup_u32 (const uint32_t * __a)
+{
+  uint32x2x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv2si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
+  ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
+  return ret;
+}
+
+__extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
+vld2_dup_p8 (const poly8_t * __a)
+{
+  poly8x8x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
+  ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
+  return ret;
+}
+
+__extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
+vld2_dup_p16 (const poly16_t * __a)
+{
+  poly16x4x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
+  ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
+  return ret;
+}
+
+__extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__))
+vld2_dup_s64 (const int64_t * __a)
+{
+  int64x1x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rdi ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
+  ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
+  return ret;
+}
+
+__extension__ static __inline uint64x1x2_t __attribute__ ((__always_inline__))
+vld2_dup_u64 (const uint64_t * __a)
+{
+  uint64x1x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rdi ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
+  ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
+  return ret;
+}
+
+__extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
+vld2q_dup_s8 (const int8_t * __a)
+{
+  int8x16x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
+  ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
+  return ret;
+}
+
+__extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
+vld2q_dup_p8 (const poly8_t * __a)
+{
+  poly8x16x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
+  ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
+  return ret;
+}
+
+__extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
+vld2q_dup_s16 (const int16_t * __a)
+{
+  int16x8x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
+  ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
+  return ret;
+}
+
+__extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
+vld2q_dup_p16 (const poly16_t * __a)
+{
+  poly16x8x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
+  ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
+  return ret;
+}
+
+__extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
+vld2q_dup_s32 (const int32_t * __a)
+{
+  int32x4x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv4si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
+  ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
+  return ret;
+}
+
+__extension__ static __inline int64x2x2_t __attribute__ ((__always_inline__))
+vld2q_dup_s64 (const int64_t * __a)
+{
+  int64x2x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
+  ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
+  return ret;
+}
+
+__extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
+vld2q_dup_u8 (const uint8_t * __a)
+{
+  uint8x16x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
+  ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
+  return ret;
+}
+
+__extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
+vld2q_dup_u16 (const uint16_t * __a)
+{
+  uint16x8x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
+  ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
+  return ret;
+}
+
+__extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
+vld2q_dup_u32 (const uint32_t * __a)
+{
+  uint32x4x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv4si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
+  ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
+  return ret;
+}
+
+__extension__ static __inline uint64x2x2_t __attribute__ ((__always_inline__))
+vld2q_dup_u64 (const uint64_t * __a)
+{
+  uint64x2x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
+  ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
+  return ret;
+}
+
+__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
+vld2q_dup_f32 (const float32_t * __a)
+{
+  float32x4x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv4sf ((const __builtin_aarch64_simd_sf *) __a);
+  ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0);
+  ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1);
+  return ret;
+}
+
+__extension__ static __inline float64x2x2_t __attribute__ ((__always_inline__))
+vld2q_dup_f64 (const float64_t * __a)
+{
+  float64x2x2_t ret;
+  __builtin_aarch64_simd_oi __o;
+  __o = __builtin_aarch64_ld2rv2df ((const __builtin_aarch64_simd_df *) __a);
+  ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0);
+  ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1);
+  return ret;
+}
+
+__extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__))
+vld3_dup_s64 (const int64_t * __a)
+{
+  int64x1x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rdi ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
+  ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
+  ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
+  return ret;
+}
+
+__extension__ static __inline uint64x1x3_t __attribute__ ((__always_inline__))
+vld3_dup_u64 (const uint64_t * __a)
+{
+  uint64x1x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rdi ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
+  ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
+  ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
+  return ret;
+}
+
+__extension__ static __inline float64x1x3_t __attribute__ ((__always_inline__))
+vld3_dup_f64 (const float64_t * __a)
+{
+  float64x1x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rdf ((const __builtin_aarch64_simd_df *) __a);
+  ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 0)};
+  ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 1)};
+  ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 2)};
+  return ret;
+}
+
+__extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__))
+vld3_dup_s8 (const int8_t * __a)
+{
+  int8x8x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
+  ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
+  ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
+  return ret;
+}
+
+__extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__))
+vld3_dup_p8 (const poly8_t * __a)
+{
+  poly8x8x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
+  ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
+  ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
+  return ret;
+}
+
+__extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__))
+vld3_dup_s16 (const int16_t * __a)
+{
+  int16x4x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
+  ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
+  ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
+  return ret;
+}
+
+__extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__))
+vld3_dup_p16 (const poly16_t * __a)
+{
+  poly16x4x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
+  ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
+  ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
+  return ret;
+}
+
+__extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__))
+vld3_dup_s32 (const int32_t * __a)
+{
+  int32x2x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv2si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
+  ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
+  ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
+  return ret;
+}
+
+__extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__))
+vld3_dup_u8 (const uint8_t * __a)
+{
+  uint8x8x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
+  ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
+  ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
+  return ret;
+}
+
+__extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__))
+vld3_dup_u16 (const uint16_t * __a)
+{
+  uint16x4x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
+  ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
+  ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
+  return ret;
+}
+
+__extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__))
+vld3_dup_u32 (const uint32_t * __a)
+{
+  uint32x2x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv2si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
+  ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
+  ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
+  return ret;
+}
+
+__extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
+vld3_dup_f32 (const float32_t * __a)
+{
+  float32x2x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv2sf ((const __builtin_aarch64_simd_sf *) __a);
+  ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 0);
+  ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 1);
+  ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 2);
+  return ret;
+}
+
+__extension__ static __inline int8x16x3_t __attribute__ ((__always_inline__))
+vld3q_dup_s8 (const int8_t * __a)
+{
+  int8x16x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
+  ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
+  ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
+  return ret;
+}
+
+__extension__ static __inline poly8x16x3_t __attribute__ ((__always_inline__))
+vld3q_dup_p8 (const poly8_t * __a)
+{
+  poly8x16x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
+  ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
+  ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
+  return ret;
+}
+
+__extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__))
+vld3q_dup_s16 (const int16_t * __a)
+{
+  int16x8x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
+  ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
+  ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
+  return ret;
+}
+
+__extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__))
+vld3q_dup_p16 (const poly16_t * __a)
+{
+  poly16x8x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
+  ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
+  ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
+  return ret;
+}
+
+__extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__))
+vld3q_dup_s32 (const int32_t * __a)
+{
+  int32x4x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv4si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
+  ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
+  ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
+  return ret;
+}
+
+__extension__ static __inline int64x2x3_t __attribute__ ((__always_inline__))
+vld3q_dup_s64 (const int64_t * __a)
+{
+  int64x2x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
+  ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
+  ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
+  return ret;
+}
+
+__extension__ static __inline uint8x16x3_t __attribute__ ((__always_inline__))
+vld3q_dup_u8 (const uint8_t * __a)
+{
+  uint8x16x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
+  ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
+  ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
+  return ret;
+}
+
+__extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__))
+vld3q_dup_u16 (const uint16_t * __a)
+{
+  uint16x8x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
+  ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
+  ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
+  return ret;
+}
+
+__extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__))
+vld3q_dup_u32 (const uint32_t * __a)
+{
+  uint32x4x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv4si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
+  ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
+  ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
+  return ret;
+}
+
+__extension__ static __inline uint64x2x3_t __attribute__ ((__always_inline__))
+vld3q_dup_u64 (const uint64_t * __a)
+{
+  uint64x2x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
+  ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
+  ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
+  return ret;
+}
+
+__extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
+vld3q_dup_f32 (const float32_t * __a)
+{
+  float32x4x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv4sf ((const __builtin_aarch64_simd_sf *) __a);
+  ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0);
+  ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1);
+  ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2);
+  return ret;
+}
+
+__extension__ static __inline float64x2x3_t __attribute__ ((__always_inline__))
+vld3q_dup_f64 (const float64_t * __a)
+{
+  float64x2x3_t ret;
+  __builtin_aarch64_simd_ci __o;
+  __o = __builtin_aarch64_ld3rv2df ((const __builtin_aarch64_simd_df *) __a);
+  ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0);
+  ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1);
+  ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2);
+  return ret;
+}
+
+__extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__))
+vld4_dup_s64 (const int64_t * __a)
+{
+  int64x1x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rdi ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
+  ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
+  ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
+  ret.val[3] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
+  return ret;
+}
+
+__extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__))
+vld4_dup_u64 (const uint64_t * __a)
+{
+  uint64x1x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rdi ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
+  ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
+  ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
+  ret.val[3] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
+  return ret;
+}
+
+__extension__ static __inline float64x1x4_t __attribute__ ((__always_inline__))
+vld4_dup_f64 (const float64_t * __a)
+{
+  float64x1x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rdf ((const __builtin_aarch64_simd_df *) __a);
+  ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 0)};
+  ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 1)};
+  ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 2)};
+  ret.val[3] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 3)};
+  return ret;
+}
+
+__extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__))
+vld4_dup_s8 (const int8_t * __a)
+{
+  int8x8x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
+  ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
+  ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
+  ret.val[3] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
+  return ret;
+}
+
+__extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__))
+vld4_dup_p8 (const poly8_t * __a)
+{
+  poly8x8x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
+  ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
+  ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
+  ret.val[3] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
+  return ret;
+}
+
+__extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__))
+vld4_dup_s16 (const int16_t * __a)
+{
+  int16x4x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
+  ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
+  ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
+  ret.val[3] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
+  return ret;
+}
+
+__extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__))
+vld4_dup_p16 (const poly16_t * __a)
+{
+  poly16x4x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
+  ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
+  ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
+  ret.val[3] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
+  return ret;
+}
+
+__extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__))
+vld4_dup_s32 (const int32_t * __a)
+{
+  int32x2x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv2si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
+  ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
+  ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
+  ret.val[3] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
+  return ret;
+}
+
+__extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__))
+vld4_dup_u8 (const uint8_t * __a)
+{
+  uint8x8x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
+  ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
+  ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
+  ret.val[3] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
+  return ret;
+}
+
+__extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__))
+vld4_dup_u16 (const uint16_t * __a)
+{
+  uint16x4x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
+  ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
+  ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
+  ret.val[3] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
+  return ret;
+}
+
+__extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__))
+vld4_dup_u32 (const uint32_t * __a)
+{
+  uint32x2x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv2si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
+  ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
+  ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
+  ret.val[3] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
+  return ret;
+}
+
+__extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
+vld4_dup_f32 (const float32_t * __a)
+{
+  float32x2x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv2sf ((const __builtin_aarch64_simd_sf *) __a);
+  ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 0);
+  ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 1);
+  ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 2);
+  ret.val[3] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 3);
+  return ret;
+}
+
+__extension__ static __inline int8x16x4_t __attribute__ ((__always_inline__))
+vld4q_dup_s8 (const int8_t * __a)
+{
+  int8x16x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
+  ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
+  ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
+  ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
+  return ret;
+}
+
+__extension__ static __inline poly8x16x4_t __attribute__ ((__always_inline__))
+vld4q_dup_p8 (const poly8_t * __a)
+{
+  poly8x16x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
+  ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
+  ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
+  ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
+  return ret;
+}
+
+__extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__))
+vld4q_dup_s16 (const int16_t * __a)
+{
+  int16x8x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
+  ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
+  ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
+  ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
+  return ret;
+}
+
+__extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__))
+vld4q_dup_p16 (const poly16_t * __a)
+{
+  poly16x8x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
+  ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
+  ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
+  ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
+  return ret;
+}
+
+__extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__))
+vld4q_dup_s32 (const int32_t * __a)
+{
+  int32x4x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv4si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
+  ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
+  ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
+  ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
+  return ret;
+}
+
+__extension__ static __inline int64x2x4_t __attribute__ ((__always_inline__))
+vld4q_dup_s64 (const int64_t * __a)
+{
+  int64x2x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
+  ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
+  ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
+  ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
+  return ret;
+}
+
+__extension__ static __inline uint8x16x4_t __attribute__ ((__always_inline__))
+vld4q_dup_u8 (const uint8_t * __a)
+{
+  uint8x16x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a);
+  ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
+  ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
+  ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
+  ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
+  return ret;
+}
+
+__extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__))
+vld4q_dup_u16 (const uint16_t * __a)
+{
+  uint16x8x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a);
+  ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
+  ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
+  ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
+  ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
+  return ret;
+}
+
+__extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__))
+vld4q_dup_u32 (const uint32_t * __a)
+{
+  uint32x4x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv4si ((const __builtin_aarch64_simd_si *) __a);
+  ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
+  ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
+  ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
+  ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
+  return ret;
+}
+
+__extension__ static __inline uint64x2x4_t __attribute__ ((__always_inline__))
+vld4q_dup_u64 (const uint64_t * __a)
+{
+  uint64x2x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a);
+  ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
+  ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
+  ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
+  ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
+  return ret;
+}
+
+__extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
+vld4q_dup_f32 (const float32_t * __a)
+{
+  float32x4x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv4sf ((const __builtin_aarch64_simd_sf *) __a);
+  ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 0);
+  ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 1);
+  ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 2);
+  ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 3);
+  return ret;
+}
+
+__extension__ static __inline float64x2x4_t __attribute__ ((__always_inline__))
+vld4q_dup_f64 (const float64_t * __a)
+{
+  float64x2x4_t ret;
+  __builtin_aarch64_simd_xi __o;
+  __o = __builtin_aarch64_ld4rv2df ((const __builtin_aarch64_simd_df *) __a);
+  ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 0);
+  ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 1);
+  ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 2);
+  ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 3);
+  return ret;
+}
+
+/* vld2_lane */
+
+#define __LD2_LANE_FUNC(intype, vectype, largetype, ptrtype,		   \
+			 mode, ptrmode, funcsuffix, signedtype)		   \
+__extension__ static __inline intype __attribute__ ((__always_inline__))   \
+vld2_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c)  \
+{									   \
+  __builtin_aarch64_simd_oi __o;					   \
+  largetype __temp;							   \
+  __temp.val[0] =							   \
+    vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (0));	   \
+  __temp.val[1] =							   \
+    vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0));	   \
+  __o = __builtin_aarch64_set_qregoi##mode (__o,			   \
+					   (signedtype) __temp.val[0],	   \
+					   0);				   \
+  __o = __builtin_aarch64_set_qregoi##mode (__o,			   \
+					   (signedtype) __temp.val[1],	   \
+					   1);				   \
+  __o =	__builtin_aarch64_ld2_lane##mode (				   \
+	  (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c);	   \
+  __b.val[0] = (vectype) __builtin_aarch64_get_dregoidi (__o, 0);	   \
+  __b.val[1] = (vectype) __builtin_aarch64_get_dregoidi (__o, 1);	   \
+  return __b;								   \
+}
+
+__LD2_LANE_FUNC (float32x2x2_t, float32x2_t, float32x4x2_t, float32_t, v4sf,
+		 sf, f32, float32x4_t)
+__LD2_LANE_FUNC (poly8x8x2_t, poly8x8_t, poly8x16x2_t, poly8_t, v16qi, qi, p8,
+		 int8x16_t)
+__LD2_LANE_FUNC (poly16x4x2_t, poly16x4_t, poly16x8x2_t, poly16_t, v8hi, hi,
+		 p16, int16x8_t)
+__LD2_LANE_FUNC (int8x8x2_t, int8x8_t, int8x16x2_t, int8_t, v16qi, qi, s8,
+		 int8x16_t)
+__LD2_LANE_FUNC (int16x4x2_t, int16x4_t, int16x8x2_t, int16_t, v8hi, hi, s16,
+		 int16x8_t)
+__LD2_LANE_FUNC (int32x2x2_t, int32x2_t, int32x4x2_t, int32_t, v4si, si, s32,
+		 int32x4_t)
+__LD2_LANE_FUNC (int64x1x2_t, int64x1_t, int64x2x2_t, int64_t, v2di, di, s64,
+		 int64x2_t)
+__LD2_LANE_FUNC (uint8x8x2_t, uint8x8_t, uint8x16x2_t, uint8_t, v16qi, qi, u8,
+		 int8x16_t)
+__LD2_LANE_FUNC (uint16x4x2_t, uint16x4_t, uint16x8x2_t, uint16_t, v8hi, hi,
+		 u16, int16x8_t)
+__LD2_LANE_FUNC (uint32x2x2_t, uint32x2_t, uint32x4x2_t, uint32_t, v4si, si,
+		 u32, int32x4_t)
+__LD2_LANE_FUNC (uint64x1x2_t, uint64x1_t, uint64x2x2_t, uint64_t, v2di, di,
+		 u64, int64x2_t)
+
+#undef __LD2_LANE_FUNC
+
+__extension__ static __inline float64x1x2_t
+__attribute__ ((__always_inline__))
+vld2_lane_f64 (const float64_t *ptr, float64x1x2_t b, const int c)
+{
+  float64x1x2_t result;
+  __asm__ ("ld1 {v16.1d, v17.1d}, %1\n\t"
+          "ld2 {v16.d, v17.d}[%3], %2\n\t"
+          "st1 {v16.1d, v17.1d}, %0\n\t"
+          : "=Q"(result)
+          : "Q"(b), "Q"(*(const float64x1x2_t *)ptr), "i"(c)
+          : "memory", "v16", "v17");
+  return result;
+}
+
+
+
+/* vld2q_lane */
+
+#define __LD2_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \
+__extension__ static __inline intype __attribute__ ((__always_inline__))   \
+vld2q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
+{									   \
+  __builtin_aarch64_simd_oi __o;					   \
+  intype ret;								   \
+  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __b.val[0], 0); \
+  __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __b.val[1], 1); \
+  __o = __builtin_aarch64_ld2_lane##mode (				   \
+	(__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c);		   \
+  ret.val[0] = (vtype) __builtin_aarch64_get_qregoiv4si (__o, 0);	   \
+  ret.val[1] = (vtype) __builtin_aarch64_get_qregoiv4si (__o, 1);	   \
+  return ret;								   \
+}
+
+__LD2_LANE_FUNC (float32x4x2_t, float32x4_t, float32_t, v4sf, sf, f32)
+__LD2_LANE_FUNC (poly8x16x2_t, poly8x16_t, poly8_t, v16qi, qi, p8)
+__LD2_LANE_FUNC (poly16x8x2_t, poly16x8_t, poly16_t, v8hi, hi, p16)
+__LD2_LANE_FUNC (int8x16x2_t, int8x16_t, int8_t, v16qi, qi, s8)
+__LD2_LANE_FUNC (int16x8x2_t, int16x8_t, int16_t, v8hi, hi, s16)
+__LD2_LANE_FUNC (int32x4x2_t, int32x4_t, int32_t, v4si, si, s32)
+__LD2_LANE_FUNC (int64x2x2_t, int64x2_t, int64_t, v2di, di, s64)
+__LD2_LANE_FUNC (uint8x16x2_t, uint8x16_t, uint8_t, v16qi, qi, u8)
+__LD2_LANE_FUNC (uint16x8x2_t, uint16x8_t, uint16_t, v8hi, hi, u16)
+__LD2_LANE_FUNC (uint32x4x2_t, uint32x4_t, uint32_t, v4si, si, u32)
+__LD2_LANE_FUNC (uint64x2x2_t, uint64x2_t, uint64_t, v2di, di, u64)
+
+__extension__ static __inline float64x2x2_t
+__attribute__ ((__always_inline__))
+vld2q_lane_f64 (const float64_t *ptr, float64x2x2_t b, const int c)
+{
+  float64x2x2_t result;
+  __asm__ ("ld1 {v16.2d, v17.2d}, %1\n\t"
+          "ld2 {v16.d, v17.d}[%3], %2\n\t"
+          "st1 {v16.2d, v17.2d}, %0\n\t"
+          : "=Q"(result)
+          : "Q"(b), "Q"(*(const float64x2x2_t *)ptr), "i"(c)
+          : "memory", "v16", "v17");
+  return result;
+}
+
+
+#undef __LD2_LANE_FUNC
+
+/* vld3_lane */
+
+#define __LD3_LANE_FUNC(intype, vectype, largetype, ptrtype,		   \
+			 mode, ptrmode, funcsuffix, signedtype)		   \
+__extension__ static __inline intype __attribute__ ((__always_inline__))   \
+vld3_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c)  \
+{									   \
+  __builtin_aarch64_simd_ci __o;					   \
+  largetype __temp;							   \
+  __temp.val[0] =							   \
+    vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (0));	   \
+  __temp.val[1] =							   \
+    vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0));	   \
+  __temp.val[2] =							   \
+    vcombine_##funcsuffix (__b.val[2], vcreate_##funcsuffix (0));	   \
+  __o = __builtin_aarch64_set_qregci##mode (__o,			   \
+					   (signedtype) __temp.val[0],	   \
+					   0);				   \
+  __o = __builtin_aarch64_set_qregci##mode (__o,			   \
+					   (signedtype) __temp.val[1],	   \
+					   1);				   \
+  __o = __builtin_aarch64_set_qregci##mode (__o,			   \
+					   (signedtype) __temp.val[2],	   \
+					   2);				   \
+  __o =	__builtin_aarch64_ld3_lane##mode (				   \
+	  (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c);	   \
+  __b.val[0] = (vectype) __builtin_aarch64_get_dregcidi (__o, 0);	   \
+  __b.val[1] = (vectype) __builtin_aarch64_get_dregcidi (__o, 1);	   \
+  __b.val[2] = (vectype) __builtin_aarch64_get_dregcidi (__o, 2);	   \
+  return __b;								   \
+}
+
+__LD3_LANE_FUNC (float32x2x3_t, float32x2_t, float32x4x3_t, float32_t, v4sf,
+		 sf, f32, float32x4_t)
+__LD3_LANE_FUNC (poly8x8x3_t, poly8x8_t, poly8x16x3_t, poly8_t, v16qi, qi, p8,
+		 int8x16_t)
+__LD3_LANE_FUNC (poly16x4x3_t, poly16x4_t, poly16x8x3_t, poly16_t, v8hi, hi,
+		 p16, int16x8_t)
+__LD3_LANE_FUNC (int8x8x3_t, int8x8_t, int8x16x3_t, int8_t, v16qi, qi, s8,
+		 int8x16_t)
+__LD3_LANE_FUNC (int16x4x3_t, int16x4_t, int16x8x3_t, int16_t, v8hi, hi, s16,
+		 int16x8_t)
+__LD3_LANE_FUNC (int32x2x3_t, int32x2_t, int32x4x3_t, int32_t, v4si, si, s32,
+		 int32x4_t)
+__LD3_LANE_FUNC (int64x1x3_t, int64x1_t, int64x2x3_t, int64_t, v2di, di, s64,
+		 int64x2_t)
+__LD3_LANE_FUNC (uint8x8x3_t, uint8x8_t, uint8x16x3_t, uint8_t, v16qi, qi, u8,
+		 int8x16_t)
+__LD3_LANE_FUNC (uint16x4x3_t, uint16x4_t, uint16x8x3_t, uint16_t, v8hi, hi,
+		 u16, int16x8_t)
+__LD3_LANE_FUNC (uint32x2x3_t, uint32x2_t, uint32x4x3_t, uint32_t, v4si, si,
+		 u32, int32x4_t)
+__LD3_LANE_FUNC (uint64x1x3_t, uint64x1_t, uint64x2x3_t, uint64_t, v2di, di,
+		 u64, int64x2_t)
+
+#undef __LD3_LANE_FUNC
+
+__extension__ static __inline float64x1x3_t
+__attribute__ ((__always_inline__))
+vld3_lane_f64 (const float64_t *ptr, float64x1x3_t b, const int c)
+{
+  float64x1x3_t result;
+  __asm__ ("ld1 {v16.1d - v18.1d}, %1\n\t"
+          "ld3 {v16.d - v18.d}[%3], %2\n\t"
+          "st1 {v16.1d - v18.1d}, %0\n\t"
+          : "=Q"(result)
+          : "Q"(b), "Q"(*(const float64x1x3_t *)ptr), "i"(c)
+          : "memory", "v16", "v17", "v18");
+  return result;
+}
+
+
+/* vld3q_lane */
+
+#define __LD3_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \
+__extension__ static __inline intype __attribute__ ((__always_inline__))   \
+vld3q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
+{									   \
+  __builtin_aarch64_simd_ci __o;					   \
+  intype ret;								   \
+  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[0], 0); \
+  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[1], 1); \
+  __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[2], 2); \
+  __o = __builtin_aarch64_ld3_lane##mode (				   \
+	(__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c);		   \
+  ret.val[0] = (vtype) __builtin_aarch64_get_qregciv4si (__o, 0);	   \
+  ret.val[1] = (vtype) __builtin_aarch64_get_qregciv4si (__o, 1);	   \
+  ret.val[2] = (vtype) __builtin_aarch64_get_qregciv4si (__o, 2);	   \
+  return ret;								   \
+}
+
+__LD3_LANE_FUNC (float32x4x3_t, float32x4_t, float32_t, v4sf, sf, f32)
+__LD3_LANE_FUNC (poly8x16x3_t, poly8x16_t, poly8_t, v16qi, qi, p8)
+__LD3_LANE_FUNC (poly16x8x3_t, poly16x8_t, poly16_t, v8hi, hi, p16)
+__LD3_LANE_FUNC (int8x16x3_t, int8x16_t, int8_t, v16qi, qi, s8)
+__LD3_LANE_FUNC (int16x8x3_t, int16x8_t, int16_t, v8hi, hi, s16)
+__LD3_LANE_FUNC (int32x4x3_t, int32x4_t, int32_t, v4si, si, s32)
+__LD3_LANE_FUNC (int64x2x3_t, int64x2_t, int64_t, v2di, di, s64)
+__LD3_LANE_FUNC (uint8x16x3_t, uint8x16_t, uint8_t, v16qi, qi, u8)
+__LD3_LANE_FUNC (uint16x8x3_t, uint16x8_t, uint16_t, v8hi, hi, u16)
+__LD3_LANE_FUNC (uint32x4x3_t, uint32x4_t, uint32_t, v4si, si, u32)
+__LD3_LANE_FUNC (uint64x2x3_t, uint64x2_t, uint64_t, v2di, di, u64)
+
+__extension__ static __inline float64x2x3_t
+__attribute__ ((__always_inline__))
+vld3q_lane_f64 (const float64_t *ptr, float64x2x3_t b, const int c)
+{
+  float64x2x3_t result;
+  __asm__ ("ld1 {v16.2d - v18.2d}, %1\n\t"
+          "ld3 {v16.d - v18.d}[%3], %2\n\t"
+          "st1 {v16.2d - v18.2d}, %0\n\t"
+          : "=Q"(result)
+          : "Q"(b), "Q"(*(const float64x2x3_t *)ptr), "i"(c)
+          : "memory", "v16", "v17", "v18");
+  return result;
+}
+
+
+#undef __LD3_LANE_FUNC
+
+/* vld4_lane */
+
+#define __LD4_LANE_FUNC(intype, vectype, largetype, ptrtype,		   \
+			 mode, ptrmode, funcsuffix, signedtype)		   \
+__extension__ static __inline intype __attribute__ ((__always_inline__))   \
+vld4_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c)  \
+{									   \
+  __builtin_aarch64_simd_xi __o;					   \
+  largetype __temp;							   \
+  __temp.val[0] =							   \
+    vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (0));	   \
+  __temp.val[1] =							   \
+    vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0));	   \
+  __temp.val[2] =							   \
+    vcombine_##funcsuffix (__b.val[2], vcreate_##funcsuffix (0));	   \
+  __temp.val[3] =							   \
+    vcombine_##funcsuffix (__b.val[3], vcreate_##funcsuffix (0));	   \
+  __o = __builtin_aarch64_set_qregxi##mode (__o,			   \
+					   (signedtype) __temp.val[0],	   \
+					   0);				   \
+  __o = __builtin_aarch64_set_qregxi##mode (__o,			   \
+					   (signedtype) __temp.val[1],	   \
+					   1);				   \
+  __o = __builtin_aarch64_set_qregxi##mode (__o,			   \
+					   (signedtype) __temp.val[2],	   \
+					   2);				   \
+  __o = __builtin_aarch64_set_qregxi##mode (__o,			   \
+					   (signedtype) __temp.val[3],	   \
+					   3);				   \
+  __o =	__builtin_aarch64_ld4_lane##mode (				   \
+	  (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c);	   \
+  __b.val[0] = (vectype) __builtin_aarch64_get_dregxidi (__o, 0);	   \
+  __b.val[1] = (vectype) __builtin_aarch64_get_dregxidi (__o, 1);	   \
+  __b.val[2] = (vectype) __builtin_aarch64_get_dregxidi (__o, 2);	   \
+  __b.val[3] = (vectype) __builtin_aarch64_get_dregxidi (__o, 3);	   \
+  return __b;								   \
+}
+
+/* vld4q_lane */
+
+__LD4_LANE_FUNC (float32x2x4_t, float32x2_t, float32x4x4_t, float32_t, v4sf,
+		 sf, f32, float32x4_t)
+__LD4_LANE_FUNC (poly8x8x4_t, poly8x8_t, poly8x16x4_t, poly8_t, v16qi, qi, p8,
+		 int8x16_t)
+__LD4_LANE_FUNC (poly16x4x4_t, poly16x4_t, poly16x8x4_t, poly16_t, v8hi, hi,
+		 p16, int16x8_t)
+__LD4_LANE_FUNC (int8x8x4_t, int8x8_t, int8x16x4_t, int8_t, v16qi, qi, s8,
+		 int8x16_t)
+__LD4_LANE_FUNC (int16x4x4_t, int16x4_t, int16x8x4_t, int16_t, v8hi, hi, s16,
+		 int16x8_t)
+__LD4_LANE_FUNC (int32x2x4_t, int32x2_t, int32x4x4_t, int32_t, v4si, si, s32,
+		 int32x4_t)
+__LD4_LANE_FUNC (int64x1x4_t, int64x1_t, int64x2x4_t, int64_t, v2di, di, s64,
+		 int64x2_t)
+__LD4_LANE_FUNC (uint8x8x4_t, uint8x8_t, uint8x16x4_t, uint8_t, v16qi, qi, u8,
+		 int8x16_t)
+__LD4_LANE_FUNC (uint16x4x4_t, uint16x4_t, uint16x8x4_t, uint16_t, v8hi, hi,
+		 u16, int16x8_t)
+__LD4_LANE_FUNC (uint32x2x4_t, uint32x2_t, uint32x4x4_t, uint32_t, v4si, si,
+		 u32, int32x4_t)
+__LD4_LANE_FUNC (uint64x1x4_t, uint64x1_t, uint64x2x4_t, uint64_t, v2di, di,
+		 u64, int64x2_t)
+
+#undef __LD4_LANE_FUNC
+
+__extension__ static __inline float64x1x4_t
+__attribute__ ((__always_inline__))
+vld4_lane_f64 (const float64_t *ptr, float64x1x4_t b, const int c)
+{
+  float64x1x4_t result;
+  __asm__ ("ld1 {v16.1d - v19.1d}, %1\n\t"
+          "ld4 {v16.d - v19.d}[%3], %2\n\t"
+          "st1 {v16.1d - v19.1d}, %0\n\t"
+          : "=Q"(result)
+          : "Q"(b), "Q"(*(const float64x1x4_t *)ptr), "i"(c)
+          : "memory", "v16", "v17", "v18", "v19");
+  return result;
+}
+
+
+/* vld4q_lane */
+
+#define __LD4_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \
+__extension__ static __inline intype __attribute__ ((__always_inline__))   \
+vld4q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \
+{									   \
+  __builtin_aarch64_simd_xi __o;					   \
+  intype ret;								   \
+  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[0], 0); \
+  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[1], 1); \
+  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[2], 2); \
+  __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[3], 3); \
+  __o = __builtin_aarch64_ld4_lane##mode (				   \
+	(__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c);		   \
+  ret.val[0] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 0);	   \
+  ret.val[1] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 1);	   \
+  ret.val[2] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 2);	   \
+  ret.val[3] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 3);	   \
+  return ret;								   \
+}
+
+__LD4_LANE_FUNC (float32x4x4_t, float32x4_t, float32_t, v4sf, sf, f32)
+__LD4_LANE_FUNC (poly8x16x4_t, poly8x16_t, poly8_t, v16qi, qi, p8)
+__LD4_LANE_FUNC (poly16x8x4_t, poly16x8_t, poly16_t, v8hi, hi, p16)
+__LD4_LANE_FUNC (int8x16x4_t, int8x16_t, int8_t, v16qi, qi, s8)
+__LD4_LANE_FUNC (int16x8x4_t, int16x8_t, int16_t, v8hi, hi, s16)
+__LD4_LANE_FUNC (int32x4x4_t, int32x4_t, int32_t, v4si, si, s32)
+__LD4_LANE_FUNC (int64x2x4_t, int64x2_t, int64_t, v2di, di, s64)
+__LD4_LANE_FUNC (uint8x16x4_t, uint8x16_t, uint8_t, v16qi, qi, u8)
+__LD4_LANE_FUNC (uint16x8x4_t, uint16x8_t, uint16_t, v8hi, hi, u16)
+__LD4_LANE_FUNC (uint32x4x4_t, uint32x4_t, uint32_t, v4si, si, u32)
+__LD4_LANE_FUNC (uint64x2x4_t, uint64x2_t, uint64_t, v2di, di, u64)
+
+__extension__ static __inline float64x2x4_t
+__attribute__ ((__always_inline__))
+vld4q_lane_f64 (const float64_t *ptr, float64x2x4_t b, const int c)
+{
+  float64x2x4_t result;
+  __asm__ ("ld1 {v16.2d - v19.2d}, %1\n\t"
+          "ld4 {v16.d - v19.d}[%3], %2\n\t"
+          "st1 {v16.2d - v19.2d}, %0\n\t"
+          : "=Q"(result)
+          : "Q"(b), "Q"(*(const float64x2x4_t *)ptr), "i"(c)
+          : "memory", "v16", "v17", "v18", "v19");
+  return result;
+}
+
+#undef __LD4_LANE_FUNC
+
 /* vmax */
 
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
@@ -19835,106 +18739,91 @@
 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
 vmaxv_f32 (float32x2_t __a)
 {
-  return vget_lane_f32 (__builtin_aarch64_reduc_smax_nan_v2sf (__a),
-			0);
+  return __builtin_aarch64_reduc_smax_nan_scal_v2sf (__a);
 }
 
 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
 vmaxv_s8 (int8x8_t __a)
 {
-  return vget_lane_s8 (__builtin_aarch64_reduc_smax_v8qi (__a), 0);
+  return __builtin_aarch64_reduc_smax_scal_v8qi (__a);
 }
 
 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
 vmaxv_s16 (int16x4_t __a)
 {
-  return vget_lane_s16 (__builtin_aarch64_reduc_smax_v4hi (__a), 0);
+  return __builtin_aarch64_reduc_smax_scal_v4hi (__a);
 }
 
 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
 vmaxv_s32 (int32x2_t __a)
 {
-  return vget_lane_s32 (__builtin_aarch64_reduc_smax_v2si (__a), 0);
+  return __builtin_aarch64_reduc_smax_scal_v2si (__a);
 }
 
 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
 vmaxv_u8 (uint8x8_t __a)
 {
-  return vget_lane_u8 ((uint8x8_t)
-		__builtin_aarch64_reduc_umax_v8qi ((int8x8_t) __a),
-		0);
+  return __builtin_aarch64_reduc_umax_scal_v8qi_uu (__a);
 }
 
 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
 vmaxv_u16 (uint16x4_t __a)
 {
-  return vget_lane_u16 ((uint16x4_t)
-		__builtin_aarch64_reduc_umax_v4hi ((int16x4_t) __a),
-		0);
+  return __builtin_aarch64_reduc_umax_scal_v4hi_uu (__a);
 }
 
 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
 vmaxv_u32 (uint32x2_t __a)
 {
-  return vget_lane_u32 ((uint32x2_t)
-		__builtin_aarch64_reduc_umax_v2si ((int32x2_t) __a),
-		0);
+  return __builtin_aarch64_reduc_umax_scal_v2si_uu (__a);
 }
 
 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
 vmaxvq_f32 (float32x4_t __a)
 {
-  return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_nan_v4sf (__a),
-			 0);
+  return __builtin_aarch64_reduc_smax_nan_scal_v4sf (__a);
 }
 
 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
 vmaxvq_f64 (float64x2_t __a)
 {
-  return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_nan_v2df (__a),
-			 0);
+  return __builtin_aarch64_reduc_smax_nan_scal_v2df (__a);
 }
 
 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
 vmaxvq_s8 (int8x16_t __a)
 {
-  return vgetq_lane_s8 (__builtin_aarch64_reduc_smax_v16qi (__a), 0);
+  return __builtin_aarch64_reduc_smax_scal_v16qi (__a);
 }
 
 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
 vmaxvq_s16 (int16x8_t __a)
 {
-  return vgetq_lane_s16 (__builtin_aarch64_reduc_smax_v8hi (__a), 0);
+  return __builtin_aarch64_reduc_smax_scal_v8hi (__a);
 }
 
 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
 vmaxvq_s32 (int32x4_t __a)
 {
-  return vgetq_lane_s32 (__builtin_aarch64_reduc_smax_v4si (__a), 0);
+  return __builtin_aarch64_reduc_smax_scal_v4si (__a);
 }
 
 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
 vmaxvq_u8 (uint8x16_t __a)
 {
-  return vgetq_lane_u8 ((uint8x16_t)
-		__builtin_aarch64_reduc_umax_v16qi ((int8x16_t) __a),
-		0);
+  return __builtin_aarch64_reduc_umax_scal_v16qi_uu (__a);
 }
 
 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
 vmaxvq_u16 (uint16x8_t __a)
 {
-  return vgetq_lane_u16 ((uint16x8_t)
-		__builtin_aarch64_reduc_umax_v8hi ((int16x8_t) __a),
-		0);
+  return __builtin_aarch64_reduc_umax_scal_v8hi_uu (__a);
 }
 
 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
 vmaxvq_u32 (uint32x4_t __a)
 {
-  return vgetq_lane_u32 ((uint32x4_t)
-		__builtin_aarch64_reduc_umax_v4si ((int32x4_t) __a),
-		0);
+  return __builtin_aarch64_reduc_umax_scal_v4si_uu (__a);
 }
 
 /* vmaxnmv  */
@@ -19942,20 +18831,19 @@
 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
 vmaxnmv_f32 (float32x2_t __a)
 {
-  return vget_lane_f32 (__builtin_aarch64_reduc_smax_v2sf (__a),
-			0);
+  return __builtin_aarch64_reduc_smax_scal_v2sf (__a);
 }
 
 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
 vmaxnmvq_f32 (float32x4_t __a)
 {
-  return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_v4sf (__a), 0);
+  return __builtin_aarch64_reduc_smax_scal_v4sf (__a);
 }
 
 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
 vmaxnmvq_f64 (float64x2_t __a)
 {
-  return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_v2df (__a), 0);
+  return __builtin_aarch64_reduc_smax_scal_v2df (__a);
 }
 
 /* vmin  */
@@ -20081,107 +18969,91 @@
 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
 vminv_f32 (float32x2_t __a)
 {
-  return vget_lane_f32 (__builtin_aarch64_reduc_smin_nan_v2sf (__a),
-			0);
+  return __builtin_aarch64_reduc_smin_nan_scal_v2sf (__a);
 }
 
 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
 vminv_s8 (int8x8_t __a)
 {
-  return vget_lane_s8 (__builtin_aarch64_reduc_smin_v8qi (__a),
-		       0);
+  return __builtin_aarch64_reduc_smin_scal_v8qi (__a);
 }
 
 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
 vminv_s16 (int16x4_t __a)
 {
-  return vget_lane_s16 (__builtin_aarch64_reduc_smin_v4hi (__a), 0);
+  return __builtin_aarch64_reduc_smin_scal_v4hi (__a);
 }
 
 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
 vminv_s32 (int32x2_t __a)
 {
-  return vget_lane_s32 (__builtin_aarch64_reduc_smin_v2si (__a), 0);
+  return __builtin_aarch64_reduc_smin_scal_v2si (__a);
 }
 
 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
 vminv_u8 (uint8x8_t __a)
 {
-  return vget_lane_u8 ((uint8x8_t)
-		__builtin_aarch64_reduc_umin_v8qi ((int8x8_t) __a),
-		0);
+  return __builtin_aarch64_reduc_umin_scal_v8qi_uu (__a);
 }
 
 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
 vminv_u16 (uint16x4_t __a)
 {
-  return vget_lane_u16 ((uint16x4_t)
-		__builtin_aarch64_reduc_umin_v4hi ((int16x4_t) __a),
-		0);
+  return __builtin_aarch64_reduc_umin_scal_v4hi_uu (__a);
 }
 
 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
 vminv_u32 (uint32x2_t __a)
 {
-  return vget_lane_u32 ((uint32x2_t)
-		__builtin_aarch64_reduc_umin_v2si ((int32x2_t) __a),
-		0);
+  return __builtin_aarch64_reduc_umin_scal_v2si_uu (__a);
 }
 
 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
 vminvq_f32 (float32x4_t __a)
 {
-  return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_nan_v4sf (__a),
-			 0);
+  return __builtin_aarch64_reduc_smin_nan_scal_v4sf (__a);
 }
 
 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
 vminvq_f64 (float64x2_t __a)
 {
-  return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_nan_v2df (__a),
-			 0);
+  return __builtin_aarch64_reduc_smin_nan_scal_v2df (__a);
 }
 
 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
 vminvq_s8 (int8x16_t __a)
 {
-  return vgetq_lane_s8 (__builtin_aarch64_reduc_smin_v16qi (__a), 0);
+  return __builtin_aarch64_reduc_smin_scal_v16qi (__a);
 }
 
 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
 vminvq_s16 (int16x8_t __a)
 {
-  return vgetq_lane_s16 (__builtin_aarch64_reduc_smin_v8hi (__a), 0);
+  return __builtin_aarch64_reduc_smin_scal_v8hi (__a);
 }
 
 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
 vminvq_s32 (int32x4_t __a)
 {
-  return vgetq_lane_s32 (__builtin_aarch64_reduc_smin_v4si (__a), 0);
+  return __builtin_aarch64_reduc_smin_scal_v4si (__a);
 }
 
 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
 vminvq_u8 (uint8x16_t __a)
 {
-  return vgetq_lane_u8 ((uint8x16_t)
-		__builtin_aarch64_reduc_umin_v16qi ((int8x16_t) __a),
-		0);
+  return __builtin_aarch64_reduc_umin_scal_v16qi_uu (__a);
 }
 
 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
 vminvq_u16 (uint16x8_t __a)
 {
-  return vgetq_lane_u16 ((uint16x8_t)
-		__builtin_aarch64_reduc_umin_v8hi ((int16x8_t) __a),
-		0);
+  return __builtin_aarch64_reduc_umin_scal_v8hi_uu (__a);
 }
 
 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
 vminvq_u32 (uint32x4_t __a)
 {
-  return vgetq_lane_u32 ((uint32x4_t)
-		__builtin_aarch64_reduc_umin_v4si ((int32x4_t) __a),
-		0);
+  return __builtin_aarch64_reduc_umin_scal_v4si_uu (__a);
 }
 
 /* vminnmv  */
@@ -20189,19 +19061,19 @@
 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
 vminnmv_f32 (float32x2_t __a)
 {
-  return vget_lane_f32 (__builtin_aarch64_reduc_smin_v2sf (__a), 0);
+  return __builtin_aarch64_reduc_smin_scal_v2sf (__a);
 }
 
 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
 vminnmvq_f32 (float32x4_t __a)
 {
-  return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_v4sf (__a), 0);
+  return __builtin_aarch64_reduc_smin_scal_v4sf (__a);
 }
 
 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
 vminnmvq_f64 (float64x2_t __a)
 {
-  return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_v2df (__a), 0);
+  return __builtin_aarch64_reduc_smin_scal_v2df (__a);
 }
 
 /* vmla */
@@ -20911,6 +19783,65 @@
   return -__a;
 }
 
+/* vpadd  */
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vpadd_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return __builtin_aarch64_addpv8qi (__a, __b);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vpadd_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return __builtin_aarch64_addpv4hi (__a, __b);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vpadd_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return __builtin_aarch64_addpv2si (__a, __b);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vpadd_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t) __builtin_aarch64_addpv8qi ((int8x8_t) __a,
+						 (int8x8_t) __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vpadd_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t) __builtin_aarch64_addpv4hi ((int16x4_t) __a,
+						  (int16x4_t) __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vpadd_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t) __builtin_aarch64_addpv2si ((int32x2_t) __a,
+						  (int32x2_t) __b);
+}
+
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vpaddd_f64 (float64x2_t __a)
+{
+  return __builtin_aarch64_reduc_plus_scal_v2df (__a);
+}
+
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vpaddd_s64 (int64x2_t __a)
+{
+  return __builtin_aarch64_addpdi (__a);
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vpaddd_u64 (uint64x2_t __a)
+{
+  return __builtin_aarch64_addpdi ((int64x2_t) __a);
+}
+
 /* vqabs */
 
 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
@@ -20937,6 +19868,12 @@
   return (int32_t) __builtin_aarch64_sqabssi (__a);
 }
 
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vqabsd_s64 (int64_t __a)
+{
+  return __builtin_aarch64_sqabsdi (__a);
+}
+
 /* vqadd */
 
 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
@@ -20966,25 +19903,26 @@
 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
 vqaddb_u8 (uint8_t __a, uint8_t __b)
 {
-  return (uint8_t) __builtin_aarch64_uqaddqi (__a, __b);
+  return (uint8_t) __builtin_aarch64_uqaddqi_uuu (__a, __b);
 }
 
 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
 vqaddh_u16 (uint16_t __a, uint16_t __b)
 {
-  return (uint16_t) __builtin_aarch64_uqaddhi (__a, __b);
+  return (uint16_t) __builtin_aarch64_uqaddhi_uuu (__a, __b);
 }
 
 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
 vqadds_u32 (uint32_t __a, uint32_t __b)
 {
-  return (uint32_t) __builtin_aarch64_uqaddsi (__a, __b);
+  return (uint32_t) __builtin_aarch64_uqaddsi_uuu (__a, __b);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vqaddd_u64 (uint64x1_t __a, uint64x1_t __b)
 {
-  return (uint64x1_t) __builtin_aarch64_uqadddi (__a, __b);
+  return (uint64x1_t) __builtin_aarch64_uqadddi_uuu ((uint64_t) __a,
+						     (uint64_t) __b);
 }
 
 /* vqdmlal */
@@ -21399,6 +20337,12 @@
   return __builtin_aarch64_sqdmull_lanehi (__a, __b, __c);
 }
 
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqdmullh_laneq_s16 (int16_t __a, int16x8_t __b, const int __c)
+{
+  return __builtin_aarch64_sqdmull_laneqhi (__a, __b, __c);
+}
+
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vqdmulls_s32 (int32_t __a, int32_t __b)
 {
@@ -21405,12 +20349,18 @@
   return (int64x1_t) __builtin_aarch64_sqdmullsi (__a, __b);
 }
 
-__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
 vqdmulls_lane_s32 (int32_t __a, int32x2_t __b, const int __c)
 {
   return __builtin_aarch64_sqdmull_lanesi (__a, __b, __c);
 }
 
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vqdmulls_laneq_s32 (int32_t __a, int32x4_t __b, const int __c)
+{
+  return __builtin_aarch64_sqdmull_laneqsi (__a, __b, __c);
+}
+
 /* vqmovn */
 
 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
@@ -21549,6 +20499,12 @@
   return (int32_t) __builtin_aarch64_sqnegsi (__a);
 }
 
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vqnegd_s64 (int64_t __a)
+{
+  return __builtin_aarch64_sqnegdi (__a);
+}
+
 /* vqrdmulh */
 
 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
@@ -21628,25 +20584,25 @@
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vqrshl_u8 (uint8x8_t __a, int8x8_t __b)
 {
-  return (uint8x8_t) __builtin_aarch64_uqrshlv8qi ((int8x8_t) __a, __b);
+  return __builtin_aarch64_uqrshlv8qi_uus ( __a, __b);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vqrshl_u16 (uint16x4_t __a, int16x4_t __b)
 {
-  return (uint16x4_t) __builtin_aarch64_uqrshlv4hi ((int16x4_t) __a, __b);
+  return __builtin_aarch64_uqrshlv4hi_uus ( __a, __b);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vqrshl_u32 (uint32x2_t __a, int32x2_t __b)
 {
-  return (uint32x2_t) __builtin_aarch64_uqrshlv2si ((int32x2_t) __a, __b);
+  return __builtin_aarch64_uqrshlv2si_uus ( __a, __b);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vqrshl_u64 (uint64x1_t __a, int64x1_t __b)
 {
-  return (uint64x1_t) __builtin_aarch64_uqrshldi ((int64x1_t) __a, __b);
+  return __builtin_aarch64_uqrshldi_uus ( __a, __b);
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
@@ -21676,25 +20632,25 @@
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vqrshlq_u8 (uint8x16_t __a, int8x16_t __b)
 {
-  return (uint8x16_t) __builtin_aarch64_uqrshlv16qi ((int8x16_t) __a, __b);
+  return __builtin_aarch64_uqrshlv16qi_uus ( __a, __b);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vqrshlq_u16 (uint16x8_t __a, int16x8_t __b)
 {
-  return (uint16x8_t) __builtin_aarch64_uqrshlv8hi ((int16x8_t) __a, __b);
+  return __builtin_aarch64_uqrshlv8hi_uus ( __a, __b);
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vqrshlq_u32 (uint32x4_t __a, int32x4_t __b)
 {
-  return (uint32x4_t) __builtin_aarch64_uqrshlv4si ((int32x4_t) __a, __b);
+  return __builtin_aarch64_uqrshlv4si_uus ( __a, __b);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vqrshlq_u64 (uint64x2_t __a, int64x2_t __b)
 {
-  return (uint64x2_t) __builtin_aarch64_uqrshlv2di ((int64x2_t) __a, __b);
+  return __builtin_aarch64_uqrshlv2di_uus ( __a, __b);
 }
 
 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
@@ -21724,25 +20680,25 @@
 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
 vqrshlb_u8 (uint8_t __a, uint8_t __b)
 {
-  return (uint8_t) __builtin_aarch64_uqrshlqi (__a, __b);
+  return __builtin_aarch64_uqrshlqi_uus (__a, __b);
 }
 
 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
 vqrshlh_u16 (uint16_t __a, uint16_t __b)
 {
-  return (uint16_t) __builtin_aarch64_uqrshlhi (__a, __b);
+  return __builtin_aarch64_uqrshlhi_uus (__a, __b);
 }
 
 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
 vqrshls_u32 (uint32_t __a, uint32_t __b)
 {
-  return (uint32_t) __builtin_aarch64_uqrshlsi (__a, __b);
+  return __builtin_aarch64_uqrshlsi_uus (__a, __b);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vqrshld_u64 (uint64x1_t __a, uint64x1_t __b)
 {
-  return (uint64x1_t) __builtin_aarch64_uqrshldi (__a, __b);
+  return __builtin_aarch64_uqrshldi_uus (__a, __b);
 }
 
 /* vqrshrn */
@@ -21768,19 +20724,19 @@
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vqrshrn_n_u16 (uint16x8_t __a, const int __b)
 {
-  return (uint8x8_t) __builtin_aarch64_uqrshrn_nv8hi ((int16x8_t) __a, __b);
+  return __builtin_aarch64_uqrshrn_nv8hi_uus ( __a, __b);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vqrshrn_n_u32 (uint32x4_t __a, const int __b)
 {
-  return (uint16x4_t) __builtin_aarch64_uqrshrn_nv4si ((int32x4_t) __a, __b);
+  return __builtin_aarch64_uqrshrn_nv4si_uus ( __a, __b);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vqrshrn_n_u64 (uint64x2_t __a, const int __b)
 {
-  return (uint32x2_t) __builtin_aarch64_uqrshrn_nv2di ((int64x2_t) __a, __b);
+  return __builtin_aarch64_uqrshrn_nv2di_uus ( __a, __b);
 }
 
 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
@@ -21804,19 +20760,19 @@
 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
 vqrshrnh_n_u16 (uint16_t __a, const int __b)
 {
-  return (uint8_t) __builtin_aarch64_uqrshrn_nhi (__a, __b);
+  return __builtin_aarch64_uqrshrn_nhi_uus (__a, __b);
 }
 
 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
 vqrshrns_n_u32 (uint32_t __a, const int __b)
 {
-  return (uint16_t) __builtin_aarch64_uqrshrn_nsi (__a, __b);
+  return __builtin_aarch64_uqrshrn_nsi_uus (__a, __b);
 }
 
 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
 vqrshrnd_n_u64 (uint64x1_t __a, const int __b)
 {
-  return (uint32_t) __builtin_aarch64_uqrshrn_ndi (__a, __b);
+  return __builtin_aarch64_uqrshrn_ndi_uus (__a, __b);
 }
 
 /* vqrshrun */
@@ -21886,25 +20842,25 @@
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vqshl_u8 (uint8x8_t __a, int8x8_t __b)
 {
-  return (uint8x8_t) __builtin_aarch64_uqshlv8qi ((int8x8_t) __a, __b);
+  return __builtin_aarch64_uqshlv8qi_uus ( __a, __b);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vqshl_u16 (uint16x4_t __a, int16x4_t __b)
 {
-  return (uint16x4_t) __builtin_aarch64_uqshlv4hi ((int16x4_t) __a, __b);
+  return __builtin_aarch64_uqshlv4hi_uus ( __a, __b);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vqshl_u32 (uint32x2_t __a, int32x2_t __b)
 {
-  return (uint32x2_t) __builtin_aarch64_uqshlv2si ((int32x2_t) __a, __b);
+  return __builtin_aarch64_uqshlv2si_uus ( __a, __b);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vqshl_u64 (uint64x1_t __a, int64x1_t __b)
 {
-  return (uint64x1_t) __builtin_aarch64_uqshldi ((int64x1_t) __a, __b);
+  return __builtin_aarch64_uqshldi_uus ( __a, __b);
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
@@ -21934,25 +20890,25 @@
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vqshlq_u8 (uint8x16_t __a, int8x16_t __b)
 {
-  return (uint8x16_t) __builtin_aarch64_uqshlv16qi ((int8x16_t) __a, __b);
+  return __builtin_aarch64_uqshlv16qi_uus ( __a, __b);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vqshlq_u16 (uint16x8_t __a, int16x8_t __b)
 {
-  return (uint16x8_t) __builtin_aarch64_uqshlv8hi ((int16x8_t) __a, __b);
+  return __builtin_aarch64_uqshlv8hi_uus ( __a, __b);
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vqshlq_u32 (uint32x4_t __a, int32x4_t __b)
 {
-  return (uint32x4_t) __builtin_aarch64_uqshlv4si ((int32x4_t) __a, __b);
+  return __builtin_aarch64_uqshlv4si_uus ( __a, __b);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vqshlq_u64 (uint64x2_t __a, int64x2_t __b)
 {
-  return (uint64x2_t) __builtin_aarch64_uqshlv2di ((int64x2_t) __a, __b);
+  return __builtin_aarch64_uqshlv2di_uus ( __a, __b);
 }
 
 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
@@ -21982,25 +20938,25 @@
 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
 vqshlb_u8 (uint8_t __a, uint8_t __b)
 {
-  return (uint8_t) __builtin_aarch64_uqshlqi (__a, __b);
+  return __builtin_aarch64_uqshlqi_uus (__a, __b);
 }
 
 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
 vqshlh_u16 (uint16_t __a, uint16_t __b)
 {
-  return (uint16_t) __builtin_aarch64_uqshlhi (__a, __b);
+  return __builtin_aarch64_uqshlhi_uus (__a, __b);
 }
 
 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
 vqshls_u32 (uint32_t __a, uint32_t __b)
 {
-  return (uint32_t) __builtin_aarch64_uqshlsi (__a, __b);
+  return __builtin_aarch64_uqshlsi_uus (__a, __b);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vqshld_u64 (uint64x1_t __a, uint64x1_t __b)
 {
-  return (uint64x1_t) __builtin_aarch64_uqshldi (__a, __b);
+  return __builtin_aarch64_uqshldi_uus (__a, __b);
 }
 
 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
@@ -22030,25 +20986,25 @@
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vqshl_n_u8 (uint8x8_t __a, const int __b)
 {
-  return (uint8x8_t) __builtin_aarch64_uqshl_nv8qi ((int8x8_t) __a, __b);
+  return __builtin_aarch64_uqshl_nv8qi_uus (__a, __b);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vqshl_n_u16 (uint16x4_t __a, const int __b)
 {
-  return (uint16x4_t) __builtin_aarch64_uqshl_nv4hi ((int16x4_t) __a, __b);
+  return __builtin_aarch64_uqshl_nv4hi_uus (__a, __b);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vqshl_n_u32 (uint32x2_t __a, const int __b)
 {
-  return (uint32x2_t) __builtin_aarch64_uqshl_nv2si ((int32x2_t) __a, __b);
+  return __builtin_aarch64_uqshl_nv2si_uus (__a, __b);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vqshl_n_u64 (uint64x1_t __a, const int __b)
 {
-  return (uint64x1_t) __builtin_aarch64_uqshl_ndi ((int64x1_t) __a, __b);
+  return __builtin_aarch64_uqshl_ndi_uus (__a, __b);
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
@@ -22078,25 +21034,25 @@
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vqshlq_n_u8 (uint8x16_t __a, const int __b)
 {
-  return (uint8x16_t) __builtin_aarch64_uqshl_nv16qi ((int8x16_t) __a, __b);
+  return __builtin_aarch64_uqshl_nv16qi_uus (__a, __b);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vqshlq_n_u16 (uint16x8_t __a, const int __b)
 {
-  return (uint16x8_t) __builtin_aarch64_uqshl_nv8hi ((int16x8_t) __a, __b);
+  return __builtin_aarch64_uqshl_nv8hi_uus (__a, __b);
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vqshlq_n_u32 (uint32x4_t __a, const int __b)
 {
-  return (uint32x4_t) __builtin_aarch64_uqshl_nv4si ((int32x4_t) __a, __b);
+  return __builtin_aarch64_uqshl_nv4si_uus (__a, __b);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vqshlq_n_u64 (uint64x2_t __a, const int __b)
 {
-  return (uint64x2_t) __builtin_aarch64_uqshl_nv2di ((int64x2_t) __a, __b);
+  return __builtin_aarch64_uqshl_nv2di_uus (__a, __b);
 }
 
 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
@@ -22126,25 +21082,25 @@
 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
 vqshlb_n_u8 (uint8_t __a, const int __b)
 {
-  return (uint8_t) __builtin_aarch64_uqshl_nqi (__a, __b);
+  return __builtin_aarch64_uqshl_nqi_uus (__a, __b);
 }
 
 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
 vqshlh_n_u16 (uint16_t __a, const int __b)
 {
-  return (uint16_t) __builtin_aarch64_uqshl_nhi (__a, __b);
+  return __builtin_aarch64_uqshl_nhi_uus (__a, __b);
 }
 
 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
 vqshls_n_u32 (uint32_t __a, const int __b)
 {
-  return (uint32_t) __builtin_aarch64_uqshl_nsi (__a, __b);
+  return __builtin_aarch64_uqshl_nsi_uus (__a, __b);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vqshld_n_u64 (uint64x1_t __a, const int __b)
 {
-  return (uint64x1_t) __builtin_aarch64_uqshl_ndi (__a, __b);
+  return __builtin_aarch64_uqshl_ndi_uus (__a, __b);
 }
 
 /* vqshlu */
@@ -22152,73 +21108,73 @@
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vqshlu_n_s8 (int8x8_t __a, const int __b)
 {
-  return (uint8x8_t) __builtin_aarch64_sqshlu_nv8qi (__a, __b);
+  return __builtin_aarch64_sqshlu_nv8qi_uss (__a, __b);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vqshlu_n_s16 (int16x4_t __a, const int __b)
 {
-  return (uint16x4_t) __builtin_aarch64_sqshlu_nv4hi (__a, __b);
+  return __builtin_aarch64_sqshlu_nv4hi_uss (__a, __b);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vqshlu_n_s32 (int32x2_t __a, const int __b)
 {
-  return (uint32x2_t) __builtin_aarch64_sqshlu_nv2si (__a, __b);
+  return __builtin_aarch64_sqshlu_nv2si_uss (__a, __b);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vqshlu_n_s64 (int64x1_t __a, const int __b)
 {
-  return (uint64x1_t) __builtin_aarch64_sqshlu_ndi (__a, __b);
+  return __builtin_aarch64_sqshlu_ndi_uss (__a, __b);
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vqshluq_n_s8 (int8x16_t __a, const int __b)
 {
-  return (uint8x16_t) __builtin_aarch64_sqshlu_nv16qi (__a, __b);
+  return __builtin_aarch64_sqshlu_nv16qi_uss (__a, __b);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vqshluq_n_s16 (int16x8_t __a, const int __b)
 {
-  return (uint16x8_t) __builtin_aarch64_sqshlu_nv8hi (__a, __b);
+  return __builtin_aarch64_sqshlu_nv8hi_uss (__a, __b);
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vqshluq_n_s32 (int32x4_t __a, const int __b)
 {
-  return (uint32x4_t) __builtin_aarch64_sqshlu_nv4si (__a, __b);
+  return __builtin_aarch64_sqshlu_nv4si_uss (__a, __b);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vqshluq_n_s64 (int64x2_t __a, const int __b)
 {
-  return (uint64x2_t) __builtin_aarch64_sqshlu_nv2di (__a, __b);
+  return __builtin_aarch64_sqshlu_nv2di_uss (__a, __b);
 }
 
 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
 vqshlub_n_s8 (int8_t __a, const int __b)
 {
-  return (int8_t) __builtin_aarch64_sqshlu_nqi (__a, __b);
+  return (int8_t) __builtin_aarch64_sqshlu_nqi_uss (__a, __b);
 }
 
 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
 vqshluh_n_s16 (int16_t __a, const int __b)
 {
-  return (int16_t) __builtin_aarch64_sqshlu_nhi (__a, __b);
+  return (int16_t) __builtin_aarch64_sqshlu_nhi_uss (__a, __b);
 }
 
 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
 vqshlus_n_s32 (int32_t __a, const int __b)
 {
-  return (int32_t) __builtin_aarch64_sqshlu_nsi (__a, __b);
+  return (int32_t) __builtin_aarch64_sqshlu_nsi_uss (__a, __b);
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vqshlud_n_s64 (int64x1_t __a, const int __b)
 {
-  return (int64x1_t) __builtin_aarch64_sqshlu_ndi (__a, __b);
+  return (int64x1_t) __builtin_aarch64_sqshlu_ndi_uss (__a, __b);
 }
 
 /* vqshrn */
@@ -22244,19 +21200,19 @@
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vqshrn_n_u16 (uint16x8_t __a, const int __b)
 {
-  return (uint8x8_t) __builtin_aarch64_uqshrn_nv8hi ((int16x8_t) __a, __b);
+  return __builtin_aarch64_uqshrn_nv8hi_uus ( __a, __b);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vqshrn_n_u32 (uint32x4_t __a, const int __b)
 {
-  return (uint16x4_t) __builtin_aarch64_uqshrn_nv4si ((int32x4_t) __a, __b);
+  return __builtin_aarch64_uqshrn_nv4si_uus ( __a, __b);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vqshrn_n_u64 (uint64x2_t __a, const int __b)
 {
-  return (uint32x2_t) __builtin_aarch64_uqshrn_nv2di ((int64x2_t) __a, __b);
+  return __builtin_aarch64_uqshrn_nv2di_uus ( __a, __b);
 }
 
 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
@@ -22280,19 +21236,19 @@
 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
 vqshrnh_n_u16 (uint16_t __a, const int __b)
 {
-  return (uint8_t) __builtin_aarch64_uqshrn_nhi (__a, __b);
+  return __builtin_aarch64_uqshrn_nhi_uus (__a, __b);
 }
 
 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
 vqshrns_n_u32 (uint32_t __a, const int __b)
 {
-  return (uint16_t) __builtin_aarch64_uqshrn_nsi (__a, __b);
+  return __builtin_aarch64_uqshrn_nsi_uus (__a, __b);
 }
 
 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
 vqshrnd_n_u64 (uint64x1_t __a, const int __b)
 {
-  return (uint32_t) __builtin_aarch64_uqshrn_ndi (__a, __b);
+  return __builtin_aarch64_uqshrn_ndi_uus (__a, __b);
 }
 
 /* vqshrun */
@@ -22362,27 +21318,66 @@
 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
 vqsubb_u8 (uint8_t __a, uint8_t __b)
 {
-  return (uint8_t) __builtin_aarch64_uqsubqi (__a, __b);
+  return (uint8_t) __builtin_aarch64_uqsubqi_uuu (__a, __b);
 }
 
 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
 vqsubh_u16 (uint16_t __a, uint16_t __b)
 {
-  return (uint16_t) __builtin_aarch64_uqsubhi (__a, __b);
+  return (uint16_t) __builtin_aarch64_uqsubhi_uuu (__a, __b);
 }
 
 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
 vqsubs_u32 (uint32_t __a, uint32_t __b)
 {
-  return (uint32_t) __builtin_aarch64_uqsubsi (__a, __b);
+  return (uint32_t) __builtin_aarch64_uqsubsi_uuu (__a, __b);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vqsubd_u64 (uint64x1_t __a, uint64x1_t __b)
 {
-  return (uint64x1_t) __builtin_aarch64_uqsubdi (__a, __b);
+  return (uint64x1_t) __builtin_aarch64_uqsubdi_uuu ((uint64_t) __a,
+						     (uint64_t) __b);
 }
 
+/* vrbit  */
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vrbit_p8 (poly8x8_t __a)
+{
+  return (poly8x8_t) __builtin_aarch64_rbitv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vrbit_s8 (int8x8_t __a)
+{
+  return __builtin_aarch64_rbitv8qi (__a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vrbit_u8 (uint8x8_t __a)
+{
+  return (uint8x8_t) __builtin_aarch64_rbitv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vrbitq_p8 (poly8x16_t __a)
+{
+  return (poly8x16_t) __builtin_aarch64_rbitv16qi ((int8x16_t)__a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vrbitq_s8 (int8x16_t __a)
+{
+  return __builtin_aarch64_rbitv16qi (__a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vrbitq_u8 (uint8x16_t __a)
+{
+  return (uint8x16_t) __builtin_aarch64_rbitv16qi ((int8x16_t) __a);
+}
+
 /* vrecpe  */
 
 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
@@ -22461,6 +21456,234 @@
   return __builtin_aarch64_frecpxdf (__a);
 }
 
+
+/* vrev  */
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vrev16_p8 (poly8x8_t a)
+{
+  return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vrev16_s8 (int8x8_t a)
+{
+  return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vrev16_u8 (uint8x8_t a)
+{
+  return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vrev16q_p8 (poly8x16_t a)
+{
+  return __builtin_shuffle (a,
+      (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vrev16q_s8 (int8x16_t a)
+{
+  return __builtin_shuffle (a,
+      (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vrev16q_u8 (uint8x16_t a)
+{
+  return __builtin_shuffle (a,
+      (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vrev32_p8 (poly8x8_t a)
+{
+  return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vrev32_p16 (poly16x4_t a)
+{
+  return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 });
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vrev32_s8 (int8x8_t a)
+{
+  return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vrev32_s16 (int16x4_t a)
+{
+  return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 });
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vrev32_u8 (uint8x8_t a)
+{
+  return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vrev32_u16 (uint16x4_t a)
+{
+  return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 });
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vrev32q_p8 (poly8x16_t a)
+{
+  return __builtin_shuffle (a,
+      (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vrev32q_p16 (poly16x8_t a)
+{
+  return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vrev32q_s8 (int8x16_t a)
+{
+  return __builtin_shuffle (a,
+      (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vrev32q_s16 (int16x8_t a)
+{
+  return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vrev32q_u8 (uint8x16_t a)
+{
+  return __builtin_shuffle (a,
+      (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vrev32q_u16 (uint16x8_t a)
+{
+  return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vrev64_f32 (float32x2_t a)
+{
+  return __builtin_shuffle (a, (uint32x2_t) { 1, 0 });
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vrev64_p8 (poly8x8_t a)
+{
+  return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vrev64_p16 (poly16x4_t a)
+{
+  return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 });
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vrev64_s8 (int8x8_t a)
+{
+  return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vrev64_s16 (int16x4_t a)
+{
+  return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 });
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vrev64_s32 (int32x2_t a)
+{
+  return __builtin_shuffle (a, (uint32x2_t) { 1, 0 });
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vrev64_u8 (uint8x8_t a)
+{
+  return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vrev64_u16 (uint16x4_t a)
+{
+  return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 });
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vrev64_u32 (uint32x2_t a)
+{
+  return __builtin_shuffle (a, (uint32x2_t) { 1, 0 });
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vrev64q_f32 (float32x4_t a)
+{
+  return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 });
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vrev64q_p8 (poly8x16_t a)
+{
+  return __builtin_shuffle (a,
+      (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vrev64q_p16 (poly16x8_t a)
+{
+  return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vrev64q_s8 (int8x16_t a)
+{
+  return __builtin_shuffle (a,
+      (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vrev64q_s16 (int16x8_t a)
+{
+  return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vrev64q_s32 (int32x4_t a)
+{
+  return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 });
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vrev64q_u8 (uint8x16_t a)
+{
+  return __builtin_shuffle (a,
+      (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vrev64q_u16 (uint16x8_t a)
+{
+  return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vrev64q_u32 (uint32x4_t a)
+{
+  return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 });
+}
+
 /* vrnd  */
 
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
@@ -22469,6 +21692,12 @@
   return __builtin_aarch64_btruncv2sf (__a);
 }
 
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vrnd_f64 (float64x1_t __a)
+{
+  return vset_lane_f64 (__builtin_trunc (vget_lane_f64 (__a, 0)), __a, 0);
+}
+
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vrndq_f32 (float32x4_t __a)
 {
@@ -22489,6 +21718,12 @@
   return __builtin_aarch64_roundv2sf (__a);
 }
 
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vrnda_f64 (float64x1_t __a)
+{
+  return vset_lane_f64 (__builtin_round (vget_lane_f64 (__a, 0)), __a, 0);
+}
+
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vrndaq_f32 (float32x4_t __a)
 {
@@ -22509,6 +21744,12 @@
   return __builtin_aarch64_nearbyintv2sf (__a);
 }
 
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vrndi_f64 (float64x1_t __a)
+{
+  return vset_lane_f64 (__builtin_nearbyint (vget_lane_f64 (__a, 0)), __a, 0);
+}
+
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vrndiq_f32 (float32x4_t __a)
 {
@@ -22529,6 +21770,12 @@
   return __builtin_aarch64_floorv2sf (__a);
 }
 
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vrndm_f64 (float64x1_t __a)
+{
+  return vset_lane_f64 (__builtin_floor (vget_lane_f64 (__a, 0)), __a, 0);
+}
+
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vrndmq_f32 (float32x4_t __a)
 {
@@ -22548,6 +21795,13 @@
 {
   return __builtin_aarch64_frintnv2sf (__a);
 }
+
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vrndn_f64 (float64x1_t __a)
+{
+  return __builtin_aarch64_frintndf (__a);
+}
+
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vrndnq_f32 (float32x4_t __a)
 {
@@ -22568,6 +21822,12 @@
   return __builtin_aarch64_ceilv2sf (__a);
 }
 
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vrndp_f64 (float64x1_t __a)
+{
+  return vset_lane_f64 (__builtin_ceil (vget_lane_f64 (__a, 0)), __a, 0);
+}
+
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vrndpq_f32 (float32x4_t __a)
 {
@@ -22588,6 +21848,12 @@
   return __builtin_aarch64_rintv2sf (__a);
 }
 
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vrndx_f64 (float64x1_t __a)
+{
+  return vset_lane_f64 (__builtin_rint (vget_lane_f64 (__a, 0)), __a, 0);
+}
+
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vrndxq_f32 (float32x4_t __a)
 {
@@ -22629,25 +21895,25 @@
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vrshl_u8 (uint8x8_t __a, int8x8_t __b)
 {
-  return (uint8x8_t) __builtin_aarch64_urshlv8qi ((int8x8_t) __a, __b);
+  return __builtin_aarch64_urshlv8qi_uus (__a, __b);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vrshl_u16 (uint16x4_t __a, int16x4_t __b)
 {
-  return (uint16x4_t) __builtin_aarch64_urshlv4hi ((int16x4_t) __a, __b);
+  return __builtin_aarch64_urshlv4hi_uus (__a, __b);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vrshl_u32 (uint32x2_t __a, int32x2_t __b)
 {
-  return (uint32x2_t) __builtin_aarch64_urshlv2si ((int32x2_t) __a, __b);
+  return __builtin_aarch64_urshlv2si_uus (__a, __b);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vrshl_u64 (uint64x1_t __a, int64x1_t __b)
 {
-  return (uint64x1_t) __builtin_aarch64_urshldi ((int64x1_t) __a, __b);
+  return __builtin_aarch64_urshldi_uus (__a, __b);
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
@@ -22677,25 +21943,25 @@
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vrshlq_u8 (uint8x16_t __a, int8x16_t __b)
 {
-  return (uint8x16_t) __builtin_aarch64_urshlv16qi ((int8x16_t) __a, __b);
+  return __builtin_aarch64_urshlv16qi_uus (__a, __b);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vrshlq_u16 (uint16x8_t __a, int16x8_t __b)
 {
-  return (uint16x8_t) __builtin_aarch64_urshlv8hi ((int16x8_t) __a, __b);
+  return __builtin_aarch64_urshlv8hi_uus (__a, __b);
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vrshlq_u32 (uint32x4_t __a, int32x4_t __b)
 {
-  return (uint32x4_t) __builtin_aarch64_urshlv4si ((int32x4_t) __a, __b);
+  return __builtin_aarch64_urshlv4si_uus (__a, __b);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vrshlq_u64 (uint64x2_t __a, int64x2_t __b)
 {
-  return (uint64x2_t) __builtin_aarch64_urshlv2di ((int64x2_t) __a, __b);
+  return __builtin_aarch64_urshlv2di_uus (__a, __b);
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
@@ -22707,7 +21973,7 @@
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vrshld_u64 (uint64x1_t __a, uint64x1_t __b)
 {
-  return (uint64x1_t) __builtin_aarch64_urshldi (__a, __b);
+  return __builtin_aarch64_urshldi_uus (__a, __b);
 }
 
 /* vrshr */
@@ -22739,25 +22005,25 @@
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vrshr_n_u8 (uint8x8_t __a, const int __b)
 {
-  return (uint8x8_t) __builtin_aarch64_urshr_nv8qi ((int8x8_t) __a, __b);
+  return __builtin_aarch64_urshr_nv8qi_uus (__a, __b);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vrshr_n_u16 (uint16x4_t __a, const int __b)
 {
-  return (uint16x4_t) __builtin_aarch64_urshr_nv4hi ((int16x4_t) __a, __b);
+  return __builtin_aarch64_urshr_nv4hi_uus (__a, __b);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vrshr_n_u32 (uint32x2_t __a, const int __b)
 {
-  return (uint32x2_t) __builtin_aarch64_urshr_nv2si ((int32x2_t) __a, __b);
+  return __builtin_aarch64_urshr_nv2si_uus (__a, __b);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vrshr_n_u64 (uint64x1_t __a, const int __b)
 {
-  return (uint64x1_t) __builtin_aarch64_urshr_ndi ((int64x1_t) __a, __b);
+  return __builtin_aarch64_urshr_ndi_uus (__a, __b);
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
@@ -22787,25 +22053,25 @@
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vrshrq_n_u8 (uint8x16_t __a, const int __b)
 {
-  return (uint8x16_t) __builtin_aarch64_urshr_nv16qi ((int8x16_t) __a, __b);
+  return __builtin_aarch64_urshr_nv16qi_uus (__a, __b);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vrshrq_n_u16 (uint16x8_t __a, const int __b)
 {
-  return (uint16x8_t) __builtin_aarch64_urshr_nv8hi ((int16x8_t) __a, __b);
+  return __builtin_aarch64_urshr_nv8hi_uus (__a, __b);
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vrshrq_n_u32 (uint32x4_t __a, const int __b)
 {
-  return (uint32x4_t) __builtin_aarch64_urshr_nv4si ((int32x4_t) __a, __b);
+  return __builtin_aarch64_urshr_nv4si_uus (__a, __b);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vrshrq_n_u64 (uint64x2_t __a, const int __b)
 {
-  return (uint64x2_t) __builtin_aarch64_urshr_nv2di ((int64x2_t) __a, __b);
+  return __builtin_aarch64_urshr_nv2di_uus (__a, __b);
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
@@ -22817,7 +22083,7 @@
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vrshrd_n_u64 (uint64x1_t __a, const int __b)
 {
-  return (uint64x1_t) __builtin_aarch64_urshr_ndi (__a, __b);
+  return __builtin_aarch64_urshr_ndi_uus (__a, __b);
 }
 
 /* vrsra */
@@ -22849,29 +22115,25 @@
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vrsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
 {
-  return (uint8x8_t) __builtin_aarch64_ursra_nv8qi ((int8x8_t) __a,
-						    (int8x8_t) __b, __c);
+  return __builtin_aarch64_ursra_nv8qi_uuus (__a, __b, __c);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vrsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
 {
-  return (uint16x4_t) __builtin_aarch64_ursra_nv4hi ((int16x4_t) __a,
-						     (int16x4_t) __b, __c);
+  return __builtin_aarch64_ursra_nv4hi_uuus (__a, __b, __c);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vrsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
 {
-  return (uint32x2_t) __builtin_aarch64_ursra_nv2si ((int32x2_t) __a,
-						     (int32x2_t) __b, __c);
+  return __builtin_aarch64_ursra_nv2si_uuus (__a, __b, __c);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vrsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
 {
-  return (uint64x1_t) __builtin_aarch64_ursra_ndi ((int64x1_t) __a,
-						   (int64x1_t) __b, __c);
+  return __builtin_aarch64_ursra_ndi_uuus (__a, __b, __c);
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
@@ -22901,29 +22163,25 @@
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vrsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
 {
-  return (uint8x16_t) __builtin_aarch64_ursra_nv16qi ((int8x16_t) __a,
-						      (int8x16_t) __b, __c);
+  return __builtin_aarch64_ursra_nv16qi_uuus (__a, __b, __c);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vrsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
 {
-  return (uint16x8_t) __builtin_aarch64_ursra_nv8hi ((int16x8_t) __a,
-						     (int16x8_t) __b, __c);
+  return __builtin_aarch64_ursra_nv8hi_uuus (__a, __b, __c);
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vrsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
 {
-  return (uint32x4_t) __builtin_aarch64_ursra_nv4si ((int32x4_t) __a,
-						     (int32x4_t) __b, __c);
+  return __builtin_aarch64_ursra_nv4si_uuus (__a, __b, __c);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vrsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
 {
-  return (uint64x2_t) __builtin_aarch64_ursra_nv2di ((int64x2_t) __a,
-						     (int64x2_t) __b, __c);
+  return __builtin_aarch64_ursra_nv2di_uuus (__a, __b, __c);
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
@@ -22935,7 +22193,7 @@
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vrsrad_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
 {
-  return (uint64x1_t) __builtin_aarch64_ursra_ndi (__a, __b, __c);
+  return __builtin_aarch64_ursra_ndi_uuus (__a, __b, __c);
 }
 
 #ifdef __ARM_FEATURE_CRYPTO
@@ -23128,109 +22386,109 @@
 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
 vshl_s8 (int8x8_t __a, int8x8_t __b)
 {
-  return (int8x8_t) __builtin_aarch64_sshlv8qi (__a, __b);
+  return __builtin_aarch64_sshlv8qi (__a, __b);
 }
 
 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
 vshl_s16 (int16x4_t __a, int16x4_t __b)
 {
-  return (int16x4_t) __builtin_aarch64_sshlv4hi (__a, __b);
+  return __builtin_aarch64_sshlv4hi (__a, __b);
 }
 
 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
 vshl_s32 (int32x2_t __a, int32x2_t __b)
 {
-  return (int32x2_t) __builtin_aarch64_sshlv2si (__a, __b);
+  return __builtin_aarch64_sshlv2si (__a, __b);
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vshl_s64 (int64x1_t __a, int64x1_t __b)
 {
-  return (int64x1_t) __builtin_aarch64_sshldi (__a, __b);
+  return __builtin_aarch64_sshldi (__a, __b);
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vshl_u8 (uint8x8_t __a, int8x8_t __b)
 {
-  return (uint8x8_t) __builtin_aarch64_ushlv8qi ((int8x8_t) __a, __b);
+  return __builtin_aarch64_ushlv8qi_uus (__a, __b);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vshl_u16 (uint16x4_t __a, int16x4_t __b)
 {
-  return (uint16x4_t) __builtin_aarch64_ushlv4hi ((int16x4_t) __a, __b);
+  return __builtin_aarch64_ushlv4hi_uus (__a, __b);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vshl_u32 (uint32x2_t __a, int32x2_t __b)
 {
-  return (uint32x2_t) __builtin_aarch64_ushlv2si ((int32x2_t) __a, __b);
+  return __builtin_aarch64_ushlv2si_uus (__a, __b);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vshl_u64 (uint64x1_t __a, int64x1_t __b)
 {
-  return (uint64x1_t) __builtin_aarch64_ushldi ((int64x1_t) __a, __b);
+  return __builtin_aarch64_ushldi_uus (__a, __b);
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
 vshlq_s8 (int8x16_t __a, int8x16_t __b)
 {
-  return (int8x16_t) __builtin_aarch64_sshlv16qi (__a, __b);
+  return __builtin_aarch64_sshlv16qi (__a, __b);
 }
 
 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
 vshlq_s16 (int16x8_t __a, int16x8_t __b)
 {
-  return (int16x8_t) __builtin_aarch64_sshlv8hi (__a, __b);
+  return __builtin_aarch64_sshlv8hi (__a, __b);
 }
 
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
 vshlq_s32 (int32x4_t __a, int32x4_t __b)
 {
-  return (int32x4_t) __builtin_aarch64_sshlv4si (__a, __b);
+  return __builtin_aarch64_sshlv4si (__a, __b);
 }
 
 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
 vshlq_s64 (int64x2_t __a, int64x2_t __b)
 {
-  return (int64x2_t) __builtin_aarch64_sshlv2di (__a, __b);
+  return __builtin_aarch64_sshlv2di (__a, __b);
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vshlq_u8 (uint8x16_t __a, int8x16_t __b)
 {
-  return (uint8x16_t) __builtin_aarch64_ushlv16qi ((int8x16_t) __a, __b);
+  return __builtin_aarch64_ushlv16qi_uus (__a, __b);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vshlq_u16 (uint16x8_t __a, int16x8_t __b)
 {
-  return (uint16x8_t) __builtin_aarch64_ushlv8hi ((int16x8_t) __a, __b);
+  return __builtin_aarch64_ushlv8hi_uus (__a, __b);
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vshlq_u32 (uint32x4_t __a, int32x4_t __b)
 {
-  return (uint32x4_t) __builtin_aarch64_ushlv4si ((int32x4_t) __a, __b);
+  return __builtin_aarch64_ushlv4si_uus (__a, __b);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vshlq_u64 (uint64x2_t __a, int64x2_t __b)
 {
-  return (uint64x2_t) __builtin_aarch64_ushlv2di ((int64x2_t) __a, __b);
+  return __builtin_aarch64_ushlv2di_uus (__a, __b);
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vshld_s64 (int64x1_t __a, int64x1_t __b)
 {
-  return (int64x1_t) __builtin_aarch64_sshldi (__a, __b);
+  return __builtin_aarch64_sshldi (__a, __b);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vshld_u64 (uint64x1_t __a, uint64x1_t __b)
 {
-  return (uint64x1_t) __builtin_aarch64_ushldi (__a, __b);
+  return __builtin_aarch64_ushldi_uus (__a, __b);
 }
 
 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
@@ -23290,19 +22548,19 @@
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vshll_n_u8 (uint8x8_t __a, const int __b)
 {
-  return (uint16x8_t) __builtin_aarch64_ushll_nv8qi ((int8x8_t) __a, __b);
+  return __builtin_aarch64_ushll_nv8qi_uus (__a, __b);
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vshll_n_u16 (uint16x4_t __a, const int __b)
 {
-  return (uint32x4_t) __builtin_aarch64_ushll_nv4hi ((int16x4_t) __a, __b);
+  return __builtin_aarch64_ushll_nv4hi_uus (__a, __b);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vshll_n_u32 (uint32x2_t __a, const int __b)
 {
-  return (uint64x2_t) __builtin_aarch64_ushll_nv2si ((int32x2_t) __a, __b);
+  return __builtin_aarch64_ushll_nv2si_uus (__a, __b);
 }
 
 /* vshr */
@@ -23444,29 +22702,25 @@
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vsli_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
 {
-  return (uint8x8_t) __builtin_aarch64_usli_nv8qi ((int8x8_t) __a,
-						   (int8x8_t) __b, __c);
+  return __builtin_aarch64_usli_nv8qi_uuus (__a, __b, __c);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vsli_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
 {
-  return (uint16x4_t) __builtin_aarch64_usli_nv4hi ((int16x4_t) __a,
-						    (int16x4_t) __b, __c);
+  return __builtin_aarch64_usli_nv4hi_uuus (__a, __b, __c);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vsli_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
 {
-  return (uint32x2_t) __builtin_aarch64_usli_nv2si ((int32x2_t) __a,
-						    (int32x2_t) __b, __c);
+  return __builtin_aarch64_usli_nv2si_uuus (__a, __b, __c);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vsli_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
 {
-  return (uint64x1_t) __builtin_aarch64_usli_ndi ((int64x1_t) __a,
-						  (int64x1_t) __b, __c);
+  return __builtin_aarch64_usli_ndi_uuus (__a, __b, __c);
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
@@ -23496,29 +22750,25 @@
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
 {
-  return (uint8x16_t) __builtin_aarch64_usli_nv16qi ((int8x16_t) __a,
-						     (int8x16_t) __b, __c);
+  return __builtin_aarch64_usli_nv16qi_uuus (__a, __b, __c);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
 {
-  return (uint16x8_t) __builtin_aarch64_usli_nv8hi ((int16x8_t) __a,
-						    (int16x8_t) __b, __c);
+  return __builtin_aarch64_usli_nv8hi_uuus (__a, __b, __c);
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
 {
-  return (uint32x4_t) __builtin_aarch64_usli_nv4si ((int32x4_t) __a,
-						    (int32x4_t) __b, __c);
+  return __builtin_aarch64_usli_nv4si_uuus (__a, __b, __c);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vsliq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
 {
-  return (uint64x2_t) __builtin_aarch64_usli_nv2di ((int64x2_t) __a,
-						    (int64x2_t) __b, __c);
+  return __builtin_aarch64_usli_nv2di_uuus (__a, __b, __c);
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
@@ -23530,7 +22780,7 @@
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vslid_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
 {
-  return (uint64x1_t) __builtin_aarch64_usli_ndi (__a, __b, __c);
+  return __builtin_aarch64_usli_ndi_uuus (__a, __b, __c);
 }
 
 /* vsqadd */
@@ -23538,80 +22788,73 @@
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vsqadd_u8 (uint8x8_t __a, int8x8_t __b)
 {
-  return (uint8x8_t) __builtin_aarch64_usqaddv8qi ((int8x8_t) __a,
-						   (int8x8_t) __b);
+  return __builtin_aarch64_usqaddv8qi_uus (__a, __b);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vsqadd_u16 (uint16x4_t __a, int16x4_t __b)
 {
-  return (uint16x4_t) __builtin_aarch64_usqaddv4hi ((int16x4_t) __a,
-						    (int16x4_t) __b);
+  return __builtin_aarch64_usqaddv4hi_uus (__a, __b);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vsqadd_u32 (uint32x2_t __a, int32x2_t __b)
 {
-  return (uint32x2_t) __builtin_aarch64_usqaddv2si ((int32x2_t) __a,
-						    (int32x2_t) __b);
+  return __builtin_aarch64_usqaddv2si_uus (__a, __b);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vsqadd_u64 (uint64x1_t __a, int64x1_t __b)
 {
-  return (uint64x1_t) __builtin_aarch64_usqadddi ((int64x1_t) __a, __b);
+  return __builtin_aarch64_usqadddi_uus (__a, __b);
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vsqaddq_u8 (uint8x16_t __a, int8x16_t __b)
 {
-  return (uint8x16_t) __builtin_aarch64_usqaddv16qi ((int8x16_t) __a,
-						     (int8x16_t) __b);
+  return __builtin_aarch64_usqaddv16qi_uus (__a, __b);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vsqaddq_u16 (uint16x8_t __a, int16x8_t __b)
 {
-  return (uint16x8_t) __builtin_aarch64_usqaddv8hi ((int16x8_t) __a,
-						    (int16x8_t) __b);
+  return __builtin_aarch64_usqaddv8hi_uus (__a, __b);
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vsqaddq_u32 (uint32x4_t __a, int32x4_t __b)
 {
-  return (uint32x4_t) __builtin_aarch64_usqaddv4si ((int32x4_t) __a,
-						    (int32x4_t) __b);
+  return __builtin_aarch64_usqaddv4si_uus (__a, __b);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vsqaddq_u64 (uint64x2_t __a, int64x2_t __b)
 {
-  return (uint64x2_t) __builtin_aarch64_usqaddv2di ((int64x2_t) __a,
-						    (int64x2_t) __b);
+  return __builtin_aarch64_usqaddv2di_uus (__a, __b);
 }
 
 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
 vsqaddb_u8 (uint8_t __a, int8_t __b)
 {
-  return (uint8_t) __builtin_aarch64_usqaddqi ((int8_t) __a, __b);
+  return __builtin_aarch64_usqaddqi_uus (__a, __b);
 }
 
 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
 vsqaddh_u16 (uint16_t __a, int16_t __b)
 {
-  return (uint16_t) __builtin_aarch64_usqaddhi ((int16_t) __a, __b);
+  return __builtin_aarch64_usqaddhi_uus (__a, __b);
 }
 
 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
 vsqadds_u32 (uint32_t __a, int32_t __b)
 {
-  return (uint32_t) __builtin_aarch64_usqaddsi ((int32_t) __a, __b);
+  return __builtin_aarch64_usqaddsi_uus (__a, __b);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vsqaddd_u64 (uint64x1_t __a, int64x1_t __b)
 {
-  return (uint64x1_t) __builtin_aarch64_usqadddi ((int64x1_t) __a, __b);
+  return __builtin_aarch64_usqadddi_uus (__a, __b);
 }
 
 /* vsqrt */
@@ -23662,29 +22905,25 @@
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
 {
-  return (uint8x8_t) __builtin_aarch64_usra_nv8qi ((int8x8_t) __a,
-						   (int8x8_t) __b, __c);
+  return __builtin_aarch64_usra_nv8qi_uuus (__a, __b, __c);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
 {
-  return (uint16x4_t) __builtin_aarch64_usra_nv4hi ((int16x4_t) __a,
-						    (int16x4_t) __b, __c);
+  return __builtin_aarch64_usra_nv4hi_uuus (__a, __b, __c);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
 {
-  return (uint32x2_t) __builtin_aarch64_usra_nv2si ((int32x2_t) __a,
-						    (int32x2_t) __b, __c);
+  return __builtin_aarch64_usra_nv2si_uuus (__a, __b, __c);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
 {
-  return (uint64x1_t) __builtin_aarch64_usra_ndi ((int64x1_t) __a,
-						  (int64x1_t) __b, __c);
+  return __builtin_aarch64_usra_ndi_uuus (__a, __b, __c);
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
@@ -23714,29 +22953,25 @@
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
 {
-  return (uint8x16_t) __builtin_aarch64_usra_nv16qi ((int8x16_t) __a,
-						     (int8x16_t) __b, __c);
+  return __builtin_aarch64_usra_nv16qi_uuus (__a, __b, __c);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
 {
-  return (uint16x8_t) __builtin_aarch64_usra_nv8hi ((int16x8_t) __a,
-						    (int16x8_t) __b, __c);
+  return __builtin_aarch64_usra_nv8hi_uuus (__a, __b, __c);
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
 {
-  return (uint32x4_t) __builtin_aarch64_usra_nv4si ((int32x4_t) __a,
-						    (int32x4_t) __b, __c);
+  return __builtin_aarch64_usra_nv4si_uuus (__a, __b, __c);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
 {
-  return (uint64x2_t) __builtin_aarch64_usra_nv2di ((int64x2_t) __a,
-						    (int64x2_t) __b, __c);
+  return __builtin_aarch64_usra_nv2di_uuus (__a, __b, __c);
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
@@ -23748,7 +22983,7 @@
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vsrad_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
 {
-  return (uint64x1_t) __builtin_aarch64_usra_ndi (__a, __b, __c);
+  return __builtin_aarch64_usra_ndi_uuus (__a, __b, __c);
 }
 
 /* vsri */
@@ -23780,29 +23015,25 @@
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vsri_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
 {
-  return (uint8x8_t) __builtin_aarch64_usri_nv8qi ((int8x8_t) __a,
-						   (int8x8_t) __b, __c);
+  return __builtin_aarch64_usri_nv8qi_uuus (__a, __b, __c);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vsri_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
 {
-  return (uint16x4_t) __builtin_aarch64_usri_nv4hi ((int16x4_t) __a,
-						    (int16x4_t) __b, __c);
+  return __builtin_aarch64_usri_nv4hi_uuus (__a, __b, __c);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vsri_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
 {
-  return (uint32x2_t) __builtin_aarch64_usri_nv2si ((int32x2_t) __a,
-						    (int32x2_t) __b, __c);
+  return __builtin_aarch64_usri_nv2si_uuus (__a, __b, __c);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vsri_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
 {
-  return (uint64x1_t) __builtin_aarch64_usri_ndi ((int64x1_t) __a,
-						  (int64x1_t) __b, __c);
+  return __builtin_aarch64_usri_ndi_uuus (__a, __b, __c);
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
@@ -23832,29 +23063,25 @@
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
 {
-  return (uint8x16_t) __builtin_aarch64_usri_nv16qi ((int8x16_t) __a,
-						     (int8x16_t) __b, __c);
+  return __builtin_aarch64_usri_nv16qi_uuus (__a, __b, __c);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
 {
-  return (uint16x8_t) __builtin_aarch64_usri_nv8hi ((int16x8_t) __a,
-						    (int16x8_t) __b, __c);
+  return __builtin_aarch64_usri_nv8hi_uuus (__a, __b, __c);
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
 {
-  return (uint32x4_t) __builtin_aarch64_usri_nv4si ((int32x4_t) __a,
-						    (int32x4_t) __b, __c);
+  return __builtin_aarch64_usri_nv4si_uuus (__a, __b, __c);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vsriq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
 {
-  return (uint64x2_t) __builtin_aarch64_usri_nv2di ((int64x2_t) __a,
-						    (int64x2_t) __b, __c);
+  return __builtin_aarch64_usri_nv2di_uuus (__a, __b, __c);
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
@@ -23866,7 +23093,7 @@
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vsrid_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
 {
-  return (uint64x1_t) __builtin_aarch64_usri_ndi (__a, __b, __c);
+  return __builtin_aarch64_usri_ndi_uuus (__a, __b, __c);
 }
 
 /* vst1 */
@@ -24970,6 +24197,438 @@
 
 /* vtrn */
 
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vtrn1_f32 (float32x2_t __a, float32x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
+#endif
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vtrn1_p8 (poly8x8_t __a, poly8x8_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
+#endif
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vtrn1_p16 (poly16x4_t __a, poly16x4_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
+#endif
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vtrn1_s8 (int8x8_t __a, int8x8_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
+#endif
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vtrn1_s16 (int16x4_t __a, int16x4_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
+#endif
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vtrn1_s32 (int32x2_t __a, int32x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
+#endif
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vtrn1_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
+#endif
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vtrn1_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
+#endif
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vtrn1_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
+#endif
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vtrn1q_f32 (float32x4_t __a, float32x4_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6});
+#endif
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vtrn1q_f64 (float64x2_t __a, float64x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
+#endif
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vtrn1q_p8 (poly8x16_t __a, poly8x16_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b,
+      (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15});
+#else
+  return __builtin_shuffle (__a, __b,
+      (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30});
+#endif
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vtrn1q_p16 (poly16x8_t __a, poly16x8_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
+#endif
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vtrn1q_s8 (int8x16_t __a, int8x16_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b,
+      (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15});
+#else
+  return __builtin_shuffle (__a, __b,
+      (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30});
+#endif
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vtrn1q_s16 (int16x8_t __a, int16x8_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
+#endif
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vtrn1q_s32 (int32x4_t __a, int32x4_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6});
+#endif
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vtrn1q_s64 (int64x2_t __a, int64x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
+#endif
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vtrn1q_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b,
+      (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15});
+#else
+  return __builtin_shuffle (__a, __b,
+      (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30});
+#endif
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vtrn1q_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
+#endif
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vtrn1q_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6});
+#endif
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vtrn1q_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
+#endif
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vtrn2_f32 (float32x2_t __a, float32x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
+#endif
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vtrn2_p8 (poly8x8_t __a, poly8x8_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
+#endif
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vtrn2_p16 (poly16x4_t __a, poly16x4_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
+#endif
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vtrn2_s8 (int8x8_t __a, int8x8_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
+#endif
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vtrn2_s16 (int16x4_t __a, int16x4_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
+#endif
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vtrn2_s32 (int32x2_t __a, int32x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
+#endif
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vtrn2_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
+#endif
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vtrn2_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
+#endif
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vtrn2_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
+#endif
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vtrn2q_f32 (float32x4_t __a, float32x4_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7});
+#endif
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vtrn2q_f64 (float64x2_t __a, float64x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
+#endif
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vtrn2q_p8 (poly8x16_t __a, poly8x16_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b,
+      (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14});
+#else
+  return __builtin_shuffle (__a, __b,
+      (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31});
+#endif
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vtrn2q_p16 (poly16x8_t __a, poly16x8_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
+#endif
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vtrn2q_s8 (int8x16_t __a, int8x16_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b,
+      (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14});
+#else
+  return __builtin_shuffle (__a, __b,
+      (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31});
+#endif
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vtrn2q_s16 (int16x8_t __a, int16x8_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
+#endif
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vtrn2q_s32 (int32x4_t __a, int32x4_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7});
+#endif
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vtrn2q_s64 (int64x2_t __a, int64x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
+#endif
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vtrn2q_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b,
+      (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14});
+#else
+  return __builtin_shuffle (__a, __b,
+      (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31});
+#endif
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vtrn2q_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
+#endif
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vtrn2q_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7});
+#endif
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vtrn2q_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
+#endif
+}
+
 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
 vtrn_f32 (float32x2_t a, float32x2_t b)
 {
@@ -25083,19 +24742,19 @@
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vtst_s8 (int8x8_t __a, int8x8_t __b)
 {
-  return (uint8x8_t) __builtin_aarch64_cmtstv8qi (__a, __b);
+  return (uint8x8_t) ((__a & __b) != 0);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vtst_s16 (int16x4_t __a, int16x4_t __b)
 {
-  return (uint16x4_t) __builtin_aarch64_cmtstv4hi (__a, __b);
+  return (uint16x4_t) ((__a & __b) != 0);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vtst_s32 (int32x2_t __a, int32x2_t __b)
 {
-  return (uint32x2_t) __builtin_aarch64_cmtstv2si (__a, __b);
+  return (uint32x2_t) ((__a & __b) != 0);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -25107,22 +24766,19 @@
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vtst_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return (uint8x8_t) __builtin_aarch64_cmtstv8qi ((int8x8_t) __a,
-						 (int8x8_t) __b);
+  return ((__a & __b) != 0);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vtst_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return (uint16x4_t) __builtin_aarch64_cmtstv4hi ((int16x4_t) __a,
-						  (int16x4_t) __b);
+  return ((__a & __b) != 0);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vtst_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return (uint32x2_t) __builtin_aarch64_cmtstv2si ((int32x2_t) __a,
-						  (int32x2_t) __b);
+  return ((__a & __b) != 0);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -25134,53 +24790,49 @@
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vtstq_s8 (int8x16_t __a, int8x16_t __b)
 {
-  return (uint8x16_t) __builtin_aarch64_cmtstv16qi (__a, __b);
+  return (uint8x16_t) ((__a & __b) != 0);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vtstq_s16 (int16x8_t __a, int16x8_t __b)
 {
-  return (uint16x8_t) __builtin_aarch64_cmtstv8hi (__a, __b);
+  return (uint16x8_t) ((__a & __b) != 0);
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vtstq_s32 (int32x4_t __a, int32x4_t __b)
 {
-  return (uint32x4_t) __builtin_aarch64_cmtstv4si (__a, __b);
+  return (uint32x4_t) ((__a & __b) != 0);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vtstq_s64 (int64x2_t __a, int64x2_t __b)
 {
-  return (uint64x2_t) __builtin_aarch64_cmtstv2di (__a, __b);
+  return (uint64x2_t) ((__a & __b) != __AARCH64_INT64_C (0));
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vtstq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return (uint8x16_t) __builtin_aarch64_cmtstv16qi ((int8x16_t) __a,
-						   (int8x16_t) __b);
+  return ((__a & __b) != 0);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vtstq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return (uint16x8_t) __builtin_aarch64_cmtstv8hi ((int16x8_t) __a,
-						  (int16x8_t) __b);
+  return ((__a & __b) != 0);
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vtstq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return (uint32x4_t) __builtin_aarch64_cmtstv4si ((int32x4_t) __a,
-						  (int32x4_t) __b);
+  return ((__a & __b) != 0);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vtstq_u64 (uint64x2_t __a, uint64x2_t __b)
 {
-  return (uint64x2_t) __builtin_aarch64_cmtstv2di ((int64x2_t) __a,
-						  (int64x2_t) __b);
+  return ((__a & __b) != __AARCH64_UINT64_C (0));
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -25200,73 +24852,73 @@
 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
 vuqadd_s8 (int8x8_t __a, uint8x8_t __b)
 {
-  return (int8x8_t) __builtin_aarch64_suqaddv8qi (__a, (int8x8_t) __b);
+  return __builtin_aarch64_suqaddv8qi_ssu (__a,  __b);
 }
 
 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
 vuqadd_s16 (int16x4_t __a, uint16x4_t __b)
 {
-  return (int16x4_t) __builtin_aarch64_suqaddv4hi (__a, (int16x4_t) __b);
+  return __builtin_aarch64_suqaddv4hi_ssu (__a,  __b);
 }
 
 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
 vuqadd_s32 (int32x2_t __a, uint32x2_t __b)
 {
-  return (int32x2_t) __builtin_aarch64_suqaddv2si (__a, (int32x2_t) __b);
+  return __builtin_aarch64_suqaddv2si_ssu (__a,  __b);
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vuqadd_s64 (int64x1_t __a, uint64x1_t __b)
 {
-  return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b);
+  return __builtin_aarch64_suqadddi_ssu (__a,  __b);
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
 vuqaddq_s8 (int8x16_t __a, uint8x16_t __b)
 {
-  return (int8x16_t) __builtin_aarch64_suqaddv16qi (__a, (int8x16_t) __b);
+  return __builtin_aarch64_suqaddv16qi_ssu (__a,  __b);
 }
 
 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
 vuqaddq_s16 (int16x8_t __a, uint16x8_t __b)
 {
-  return (int16x8_t) __builtin_aarch64_suqaddv8hi (__a, (int16x8_t) __b);
+  return __builtin_aarch64_suqaddv8hi_ssu (__a,  __b);
 }
 
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
 vuqaddq_s32 (int32x4_t __a, uint32x4_t __b)
 {
-  return (int32x4_t) __builtin_aarch64_suqaddv4si (__a, (int32x4_t) __b);
+  return __builtin_aarch64_suqaddv4si_ssu (__a,  __b);
 }
 
 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
 vuqaddq_s64 (int64x2_t __a, uint64x2_t __b)
 {
-  return (int64x2_t) __builtin_aarch64_suqaddv2di (__a, (int64x2_t) __b);
+  return __builtin_aarch64_suqaddv2di_ssu (__a,  __b);
 }
 
 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
 vuqaddb_s8 (int8_t __a, uint8_t __b)
 {
-  return (int8_t) __builtin_aarch64_suqaddqi (__a, (int8_t) __b);
+  return __builtin_aarch64_suqaddqi_ssu (__a,  __b);
 }
 
 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
 vuqaddh_s16 (int16_t __a, uint16_t __b)
 {
-  return (int16_t) __builtin_aarch64_suqaddhi (__a, (int16_t) __b);
+  return __builtin_aarch64_suqaddhi_ssu (__a,  __b);
 }
 
 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
 vuqadds_s32 (int32_t __a, uint32_t __b)
 {
-  return (int32_t) __builtin_aarch64_suqaddsi (__a, (int32_t) __b);
+  return __builtin_aarch64_suqaddsi_ssu (__a,  __b);
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vuqaddd_s64 (int64x1_t __a, uint64x1_t __b)
 {
-  return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b);
+  return __builtin_aarch64_suqadddi_ssu (__a,  __b);
 }
 
 #define __DEFINTERLEAVE(op, rettype, intype, funcsuffix, Q) 		\
@@ -25300,10 +24952,880 @@
 
 /* vuzp */
 
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vuzp1_f32 (float32x2_t __a, float32x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
+#endif
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vuzp1_p8 (poly8x8_t __a, poly8x8_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
+#endif
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vuzp1_p16 (poly16x4_t __a, poly16x4_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
+#endif
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vuzp1_s8 (int8x8_t __a, int8x8_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
+#endif
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vuzp1_s16 (int16x4_t __a, int16x4_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
+#endif
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vuzp1_s32 (int32x2_t __a, int32x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
+#endif
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vuzp1_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
+#endif
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vuzp1_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
+#endif
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vuzp1_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
+#endif
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vuzp1q_f32 (float32x4_t __a, float32x4_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
+#endif
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vuzp1q_f64 (float64x2_t __a, float64x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
+#endif
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vuzp1q_p8 (poly8x16_t __a, poly8x16_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x16_t)
+      {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x16_t)
+      {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
+#endif
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vuzp1q_p16 (poly16x8_t __a, poly16x8_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
+#endif
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vuzp1q_s8 (int8x16_t __a, int8x16_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b,
+      (uint8x16_t) {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
+#else
+  return __builtin_shuffle (__a, __b,
+      (uint8x16_t) {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
+#endif
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vuzp1q_s16 (int16x8_t __a, int16x8_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
+#endif
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vuzp1q_s32 (int32x4_t __a, int32x4_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
+#endif
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vuzp1q_s64 (int64x2_t __a, int64x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
+#endif
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vuzp1q_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b,
+      (uint8x16_t) {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
+#else
+  return __builtin_shuffle (__a, __b,
+      (uint8x16_t) {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
+#endif
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vuzp1q_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
+#endif
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vuzp1q_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
+#endif
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vuzp1q_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
+#endif
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vuzp2_f32 (float32x2_t __a, float32x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
+#endif
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vuzp2_p8 (poly8x8_t __a, poly8x8_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
+#endif
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vuzp2_p16 (poly16x4_t __a, poly16x4_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
+#endif
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vuzp2_s8 (int8x8_t __a, int8x8_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
+#endif
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vuzp2_s16 (int16x4_t __a, int16x4_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
+#endif
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vuzp2_s32 (int32x2_t __a, int32x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
+#endif
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vuzp2_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
+#endif
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vuzp2_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
+#endif
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vuzp2_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
+#endif
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vuzp2q_f32 (float32x4_t __a, float32x4_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
+#endif
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vuzp2q_f64 (float64x2_t __a, float64x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
+#endif
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vuzp2q_p8 (poly8x16_t __a, poly8x16_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b,
+      (uint8x16_t) {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
+#else
+  return __builtin_shuffle (__a, __b,
+      (uint8x16_t) {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
+#endif
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vuzp2q_p16 (poly16x8_t __a, poly16x8_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
+#endif
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vuzp2q_s8 (int8x16_t __a, int8x16_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b,
+      (uint8x16_t) {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
+#else
+  return __builtin_shuffle (__a, __b,
+      (uint8x16_t) {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
+#endif
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vuzp2q_s16 (int16x8_t __a, int16x8_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
+#endif
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vuzp2q_s32 (int32x4_t __a, int32x4_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
+#endif
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vuzp2q_s64 (int64x2_t __a, int64x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
+#endif
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vuzp2q_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x16_t)
+      {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x16_t)
+      {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
+#endif
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vuzp2q_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
+#endif
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vuzp2q_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
+#endif
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vuzp2q_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
+#endif
+}
+
 __INTERLEAVE_LIST (uzp)
 
 /* vzip */
 
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vzip1_f32 (float32x2_t __a, float32x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
+#endif
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vzip1_p8 (poly8x8_t __a, poly8x8_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
+#endif
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vzip1_p16 (poly16x4_t __a, poly16x4_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
+#endif
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vzip1_s8 (int8x8_t __a, int8x8_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
+#endif
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vzip1_s16 (int16x4_t __a, int16x4_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
+#endif
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vzip1_s32 (int32x2_t __a, int32x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
+#endif
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vzip1_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
+#endif
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vzip1_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
+#endif
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vzip1_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
+#endif
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vzip1q_f32 (float32x4_t __a, float32x4_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
+#endif
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vzip1q_f64 (float64x2_t __a, float64x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
+#endif
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vzip1q_p8 (poly8x16_t __a, poly8x16_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x16_t)
+      {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x16_t)
+      {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
+#endif
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vzip1q_p16 (poly16x8_t __a, poly16x8_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x8_t)
+      {12, 4, 13, 5, 14, 6, 15, 7});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
+#endif
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vzip1q_s8 (int8x16_t __a, int8x16_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x16_t)
+      {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x16_t)
+      {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
+#endif
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vzip1q_s16 (int16x8_t __a, int16x8_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x8_t)
+      {12, 4, 13, 5, 14, 6, 15, 7});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
+#endif
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vzip1q_s32 (int32x4_t __a, int32x4_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
+#endif
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vzip1q_s64 (int64x2_t __a, int64x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
+#endif
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vzip1q_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x16_t)
+      {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x16_t)
+      {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
+#endif
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vzip1q_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x8_t)
+      {12, 4, 13, 5, 14, 6, 15, 7});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
+#endif
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vzip1q_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
+#endif
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vzip1q_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
+#endif
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vzip2_f32 (float32x2_t __a, float32x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
+#endif
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vzip2_p8 (poly8x8_t __a, poly8x8_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
+#endif
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vzip2_p16 (poly16x4_t __a, poly16x4_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
+#endif
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vzip2_s8 (int8x8_t __a, int8x8_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
+#endif
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vzip2_s16 (int16x4_t __a, int16x4_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
+#endif
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vzip2_s32 (int32x2_t __a, int32x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
+#endif
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vzip2_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
+#endif
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vzip2_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
+#endif
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vzip2_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
+#endif
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vzip2q_f32 (float32x4_t __a, float32x4_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
+#endif
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vzip2q_f64 (float64x2_t __a, float64x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
+#endif
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vzip2q_p8 (poly8x16_t __a, poly8x16_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x16_t)
+      {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x16_t)
+      {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
+#endif
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vzip2q_p16 (poly16x8_t __a, poly16x8_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x8_t)
+      {4, 12, 5, 13, 6, 14, 7, 15});
+#endif
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vzip2q_s8 (int8x16_t __a, int8x16_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x16_t)
+      {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x16_t)
+      {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
+#endif
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vzip2q_s16 (int16x8_t __a, int16x8_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x8_t)
+      {4, 12, 5, 13, 6, 14, 7, 15});
+#endif
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vzip2q_s32 (int32x4_t __a, int32x4_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
+#endif
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vzip2q_s64 (int64x2_t __a, int64x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
+#endif
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vzip2q_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint8x16_t)
+      {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x16_t)
+      {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
+#endif
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vzip2q_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x8_t)
+      {4, 12, 5, 13, 6, 14, 7, 15});
+#endif
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vzip2q_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
+#endif
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vzip2q_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
+#endif
+}
+
 __INTERLEAVE_LIST (zip)
 
 #undef __INTERLEAVE_LIST
--- a/src/gcc/config/aarch64/t-aarch64-linux
+++ b/src/gcc/config/aarch64/t-aarch64-linux
@@ -22,10 +22,7 @@
 LIB1ASMFUNCS = _aarch64_sync_cache_range
 
 AARCH_BE = $(if $(findstring TARGET_BIG_ENDIAN_DEFAULT=1, $(tm_defines)),_be)
-MULTILIB_OSDIRNAMES = .=../lib64$(call if_multiarch,:aarch64$(AARCH_BE)-linux-gnu)
+MULTILIB_OSDIRNAMES = mabi.lp64=../lib64$(call if_multiarch,:aarch64$(AARCH_BE)-linux-gnu)
 MULTIARCH_DIRNAME = $(call if_multiarch,aarch64$(AARCH_BE)-linux-gnu)
 
-# Disable the multilib for linux-gnu targets for the time being; focus
-# on the baremetal targets.
-MULTILIB_OPTIONS    =
-MULTILIB_DIRNAMES   =
+MULTILIB_OSDIRNAMES += mabi.ilp32=../libilp32
--- a/src/gcc/config/aarch64/aarch64.md
+++ b/src/gcc/config/aarch64/aarch64.md
@@ -67,7 +67,14 @@
 
 (define_c_enum "unspec" [
     UNSPEC_CASESI
-    UNSPEC_CLS
+    UNSPEC_CRC32B
+    UNSPEC_CRC32CB
+    UNSPEC_CRC32CH
+    UNSPEC_CRC32CW
+    UNSPEC_CRC32CX
+    UNSPEC_CRC32H
+    UNSPEC_CRC32W
+    UNSPEC_CRC32X
     UNSPEC_FRECPE
     UNSPEC_FRECPS
     UNSPEC_FRECPX
@@ -83,8 +90,14 @@
     UNSPEC_GOTTINYPIC
     UNSPEC_LD1
     UNSPEC_LD2
+    UNSPEC_LD2_DUP
     UNSPEC_LD3
+    UNSPEC_LD3_DUP
     UNSPEC_LD4
+    UNSPEC_LD4_DUP
+    UNSPEC_LD2_LANE
+    UNSPEC_LD3_LANE
+    UNSPEC_LD4_LANE
     UNSPEC_MB
     UNSPEC_NOP
     UNSPEC_PRLG_STK
@@ -93,20 +106,27 @@
     UNSPEC_SISD_SSHL
     UNSPEC_SISD_USHL
     UNSPEC_SSHL_2S
-    UNSPEC_SSHR64
     UNSPEC_ST1
     UNSPEC_ST2
     UNSPEC_ST3
     UNSPEC_ST4
+    UNSPEC_ST2_LANE
+    UNSPEC_ST3_LANE
+    UNSPEC_ST4_LANE
     UNSPEC_TLS
     UNSPEC_TLSDESC
     UNSPEC_USHL_2S
-    UNSPEC_USHR64
     UNSPEC_VSTRUCTDUMMY
+    UNSPEC_SP_SET
+    UNSPEC_SP_TEST
 ])
 
 (define_c_enum "unspecv" [
     UNSPECV_EH_RETURN		; Represent EH_RETURN
+    UNSPECV_GET_FPCR		; Represent fetch of FPCR content.
+    UNSPECV_SET_FPCR		; Represent assign of FPCR content.
+    UNSPECV_GET_FPSR		; Represent fetch of FPSR content.
+    UNSPECV_SET_FPSR		; Represent assign of FPSR content.
   ]
 )
 
@@ -155,17 +175,11 @@
 ;; Processor types.
 (include "aarch64-tune.md")
 
-;; True if the generic scheduling description should be used.
-
-(define_attr "generic_sched" "yes,no"
-  (const (if_then_else
-          (eq_attr "tune" "cortexa53,cortexa15")
-          (const_string "no")
-          (const_string "yes"))))
-
 ;; Scheduling
 (include "../arm/cortex-a53.md")
-(include "../arm/cortex-a15.md")
+(include "../arm/cortex-a57.md")
+(include "thunderx.md")
+(include "../arm/xgene1.md")
 
 ;; -------------------------------------------------------------------
 ;; Jumps and other miscellaneous insns
@@ -514,6 +528,10 @@
 	      (use (match_operand 2 "" ""))])]
   ""
   {
+    if (!REG_P (XEXP (operands[0], 0))
+       && (GET_CODE (XEXP (operands[0], 0)) != SYMBOL_REF))
+     XEXP (operands[0], 0) = force_reg (Pmode, XEXP (operands[0], 0));
+
     if (operands[2] == NULL_RTX)
       operands[2] = const0_rtx;
   }
@@ -527,6 +545,10 @@
 	      (use (match_operand 3 "" ""))])]
   ""
   {
+    if (!REG_P (XEXP (operands[1], 0))
+       && (GET_CODE (XEXP (operands[1], 0)) != SYMBOL_REF))
+     XEXP (operands[1], 0) = force_reg (Pmode, XEXP (operands[1], 0));
+
     if (operands[3] == NULL_RTX)
       operands[3] = const0_rtx;
   }
@@ -533,25 +555,29 @@
 )
 
 (define_insn "*sibcall_insn"
-  [(call (mem:DI (match_operand:DI 0 "" "X"))
+  [(call (mem:DI (match_operand:DI 0 "aarch64_call_insn_operand" "Ucs, Usf"))
 	 (match_operand 1 "" ""))
    (return)
    (use (match_operand 2 "" ""))]
-  "GET_CODE (operands[0]) == SYMBOL_REF"
-  "b\\t%a0"
-  [(set_attr "type" "branch")]
-
+  "SIBLING_CALL_P (insn)"
+  "@
+   br\\t%0
+   b\\t%a0"
+  [(set_attr "type" "branch, branch")]
 )
 
 (define_insn "*sibcall_value_insn"
   [(set (match_operand 0 "" "")
-	(call (mem:DI (match_operand 1 "" "X"))
+	(call (mem:DI
+		(match_operand:DI 1 "aarch64_call_insn_operand" "Ucs, Usf"))
 	      (match_operand 2 "" "")))
    (return)
    (use (match_operand 3 "" ""))]
-  "GET_CODE (operands[1]) == SYMBOL_REF"
-  "b\\t%a1"
-  [(set_attr "type" "branch")]
+  "SIBLING_CALL_P (insn)"
+  "@
+   br\\t%1
+   b\\t%a1"
+  [(set_attr "type" "branch, branch")]
 )
 
 ;; Call subroutine returning any type.
@@ -641,17 +667,20 @@
     if (GET_CODE (operands[0]) == MEM && operands[1] != const0_rtx)
       operands[1] = force_reg (<MODE>mode, operands[1]);
 
-    if (CONSTANT_P (operands[1]))
-      {
-	aarch64_expand_mov_immediate (operands[0], operands[1]);
-	DONE;
-      }
+    /* FIXME: RR we still need to fix up what we are doing with
+       symbol_refs and other types of constants.  */
+    if (CONSTANT_P (operands[1])
+        && !CONST_INT_P (operands[1]))
+     {
+       aarch64_expand_mov_immediate (operands[0], operands[1]);
+       DONE;
+     }
   "
 )
 
-(define_insn "*movsi_aarch64"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,*w,m,  m,r,r  ,*w, r,*w")
-	(match_operand:SI 1 "aarch64_mov_operand"  " r,r,k,M,m, m,rZ,*w,S,Ush,rZ,*w,*w"))]
+(define_insn_and_split "*movsi_aarch64"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m,  m,r,r  ,*w, r,*w")
+	(match_operand:SI 1 "aarch64_mov_operand"  " r,r,k,M,n,m, m,rZ,*w,S,Ush,rZ,*w,*w"))]
   "(register_operand (operands[0], SImode)
     || aarch64_reg_or_zero (operands[1], SImode))"
   "@
@@ -659,6 +688,7 @@
    mov\\t%w0, %w1
    mov\\t%w0, %w1
    mov\\t%w0, %1
+   #
    ldr\\t%w0, %1
    ldr\\t%s0, %1
    str\\t%w1, %0
@@ -668,14 +698,21 @@
    fmov\\t%s0, %w1
    fmov\\t%w0, %s1
    fmov\\t%s0, %s1"
-  [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,load1,load1,store1,store1,\
-                     adr,adr,fmov,fmov,fmov")
-   (set_attr "fp" "*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes")]
+   "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode)
+    && GP_REGNUM_P (REGNO (operands[0]))"
+   [(const_int 0)]
+   "{
+       aarch64_expand_mov_immediate (operands[0], operands[1]);
+       DONE;
+    }"
+  [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,load1,load1,store1,store1,\
+                     adr,adr,f_mcr,f_mrc,fmov")
+   (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes")]
 )
 
-(define_insn "*movdi_aarch64"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,*w,m,  m,r,r,  *w, r,*w,w")
-	(match_operand:DI 1 "aarch64_mov_operand"  " r,r,k,N,m, m,rZ,*w,S,Ush,rZ,*w,*w,Dd"))]
+(define_insn_and_split "*movdi_aarch64"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m,  m,r,r,  *w, r,*w,w")
+	(match_operand:DI 1 "aarch64_mov_operand"  " r,r,k,N,n,m, m,rZ,*w,S,Ush,rZ,*w,*w,Dd"))]
   "(register_operand (operands[0], DImode)
     || aarch64_reg_or_zero (operands[1], DImode))"
   "@
@@ -683,6 +720,7 @@
    mov\\t%0, %x1
    mov\\t%x0, %1
    mov\\t%x0, %1
+   #
    ldr\\t%x0, %1
    ldr\\t%d0, %1
    str\\t%x1, %0
@@ -693,10 +731,17 @@
    fmov\\t%x0, %d1
    fmov\\t%d0, %d1
    movi\\t%d0, %1"
-  [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,load1,load1,store1,store1,\
-                     adr,adr,fmov,fmov,fmov,fmov")
-   (set_attr "fp" "*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
-   (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
+   "(CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), DImode))
+    && GP_REGNUM_P (REGNO (operands[0]))"
+   [(const_int 0)]
+   "{
+       aarch64_expand_mov_immediate (operands[0], operands[1]);
+       DONE;
+    }"
+  [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,load1,load1,store1,store1,\
+                     adr,adr,f_mcr,f_mrc,fmov,fmov")
+   (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
+   (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
 )
 
 (define_insn "insv_imm<mode>"
@@ -789,7 +834,7 @@
    str\\t%w1, %0
    mov\\t%w0, %w1"
   [(set_attr "type" "f_mcr,f_mrc,fmov,fconsts,\
-                     f_loads,f_stores,f_loads,f_stores,fmov")]
+                     f_loads,f_stores,f_loads,f_stores,mov_reg")]
 )
 
 (define_insn "*movdf_aarch64"
@@ -863,6 +908,24 @@
   }
 )
 
+;; 0 is dst
+;; 1 is src
+;; 2 is size of move in bytes
+;; 3 is alignment
+
+(define_expand "movmemdi"
+  [(match_operand:BLK 0 "memory_operand")
+   (match_operand:BLK 1 "memory_operand")
+   (match_operand:DI 2 "immediate_operand")
+   (match_operand:DI 3 "immediate_operand")]
+   "!STRICT_ALIGNMENT"
+{
+  if (aarch64_expand_movmem (operands))
+    DONE;
+  FAIL;
+}
+)
+
 ;; Operands 1 and 3 are tied together by the final condition; so we allow
 ;; fairly lax checking on the second memory operation.
 (define_insn "load_pair<mode>"
@@ -923,31 +986,45 @@
   [(set_attr "type" "neon_store1_2reg<q>")]
 )
 
-;; Load pair with writeback.  This is primarily used in function epilogues
-;; when restoring [fp,lr]
+;; Load pair with post-index writeback.  This is primarily used in function
+;; epilogues.
 (define_insn "loadwb_pair<GPI:mode>_<P:mode>"
   [(parallel
     [(set (match_operand:P 0 "register_operand" "=k")
           (plus:P (match_operand:P 1 "register_operand" "0")
-                  (match_operand:P 4 "const_int_operand" "n")))
+                  (match_operand:P 4 "aarch64_mem_pair_offset" "n")))
      (set (match_operand:GPI 2 "register_operand" "=r")
-          (mem:GPI (plus:P (match_dup 1)
-                   (match_dup 4))))
+          (mem:GPI (match_dup 1)))
      (set (match_operand:GPI 3 "register_operand" "=r")
           (mem:GPI (plus:P (match_dup 1)
                    (match_operand:P 5 "const_int_operand" "n"))))])]
-  "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (<GPI:MODE>mode)"
+  "INTVAL (operands[5]) == GET_MODE_SIZE (<GPI:MODE>mode)"
   "ldp\\t%<w>2, %<w>3, [%1], %4"
   [(set_attr "type" "load2")]
 )
 
-;; Store pair with writeback.  This is primarily used in function prologues
-;; when saving [fp,lr]
+(define_insn "loadwb_pair<GPF:mode>_<P:mode>"
+  [(parallel
+    [(set (match_operand:P 0 "register_operand" "=k")
+          (plus:P (match_operand:P 1 "register_operand" "0")
+                  (match_operand:P 4 "aarch64_mem_pair_offset" "n")))
+     (set (match_operand:GPF 2 "register_operand" "=w")
+          (mem:GPF (match_dup 1)))
+     (set (match_operand:GPF 3 "register_operand" "=w")
+          (mem:GPF (plus:P (match_dup 1)
+                   (match_operand:P 5 "const_int_operand" "n"))))])]
+  "INTVAL (operands[5]) == GET_MODE_SIZE (<GPF:MODE>mode)"
+  "ldp\\t%<w>2, %<w>3, [%1], %4"
+  [(set_attr "type" "neon_load1_2reg")]
+)
+
+;; Store pair with pre-index writeback.  This is primarily used in function
+;; prologues.
 (define_insn "storewb_pair<GPI:mode>_<P:mode>"
   [(parallel
     [(set (match_operand:P 0 "register_operand" "=&k")
           (plus:P (match_operand:P 1 "register_operand" "0")
-                  (match_operand:P 4 "const_int_operand" "n")))
+                  (match_operand:P 4 "aarch64_mem_pair_offset" "n")))
      (set (mem:GPI (plus:P (match_dup 0)
                    (match_dup 4)))
           (match_operand:GPI 2 "register_operand" "r"))
@@ -959,6 +1036,22 @@
   [(set_attr "type" "store2")]
 )
 
+(define_insn "storewb_pair<GPF:mode>_<P:mode>"
+  [(parallel
+    [(set (match_operand:P 0 "register_operand" "=&k")
+          (plus:P (match_operand:P 1 "register_operand" "0")
+                  (match_operand:P 4 "aarch64_mem_pair_offset" "n")))
+     (set (mem:GPF (plus:P (match_dup 0)
+                   (match_dup 4)))
+          (match_operand:GPF 2 "register_operand" "w"))
+     (set (mem:GPF (plus:P (match_dup 0)
+                   (match_operand:P 5 "const_int_operand" "n")))
+          (match_operand:GPF 3 "register_operand" "w"))])]
+  "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (<GPF:MODE>mode)"
+  "stp\\t%<w>2, %<w>3, [%0, %4]!"
+  [(set_attr "type" "neon_store1_2reg<q>")]
+)
+
 ;; -------------------------------------------------------------------
 ;; Sign/Zero extension
 ;; -------------------------------------------------------------------
@@ -1063,16 +1156,18 @@
 
 (define_insn "*addsi3_aarch64"
   [(set
-    (match_operand:SI 0 "register_operand" "=rk,rk,rk")
+    (match_operand:SI 0 "register_operand" "=rk,rk,w,rk")
     (plus:SI
-     (match_operand:SI 1 "register_operand" "%rk,rk,rk")
-     (match_operand:SI 2 "aarch64_plus_operand" "I,r,J")))]
+     (match_operand:SI 1 "register_operand" "%rk,rk,w,rk")
+     (match_operand:SI 2 "aarch64_plus_operand" "I,r,w,J")))]
   ""
   "@
   add\\t%w0, %w1, %2
   add\\t%w0, %w1, %w2
+  add\\t%0.2s, %1.2s, %2.2s
   sub\\t%w0, %w1, #%n2"
-  [(set_attr "type" "alu_imm,alu_reg,alu_imm")]
+  [(set_attr "type" "alu_imm,alu_reg,neon_add,alu_imm")
+   (set_attr "simd" "*,*,yes,*")]
 )
 
 ;; zero_extend version of above
@@ -1092,10 +1187,10 @@
 
 (define_insn "*adddi3_aarch64"
   [(set
-    (match_operand:DI 0 "register_operand" "=rk,rk,rk,!w")
+    (match_operand:DI 0 "register_operand" "=rk,rk,rk,w")
     (plus:DI
-     (match_operand:DI 1 "register_operand" "%rk,rk,rk,!w")
-     (match_operand:DI 2 "aarch64_plus_operand" "I,r,J,!w")))]
+     (match_operand:DI 1 "register_operand" "%rk,rk,rk,w")
+     (match_operand:DI 2 "aarch64_plus_operand" "I,r,J,w")))]
   ""
   "@
   add\\t%x0, %x1, %2
@@ -1106,7 +1201,26 @@
    (set_attr "simd" "*,*,*,yes")]
 )
 
-(define_insn "*add<mode>3_compare0"
+(define_expand "addti3"
+  [(set (match_operand:TI 0 "register_operand" "")
+	(plus:TI (match_operand:TI 1 "register_operand" "")
+		 (match_operand:TI 2 "register_operand" "")))]
+  ""
+{
+  rtx low = gen_reg_rtx (DImode);
+  emit_insn (gen_adddi3_compare0 (low, gen_lowpart (DImode, operands[1]),
+				  gen_lowpart (DImode, operands[2])));
+
+  rtx high = gen_reg_rtx (DImode);
+  emit_insn (gen_adddi3_carryin (high, gen_highpart (DImode, operands[1]),
+				 gen_highpart (DImode, operands[2])));
+
+  emit_move_insn (gen_lowpart (DImode, operands[0]), low);
+  emit_move_insn (gen_highpart (DImode, operands[0]), high);
+  DONE;
+})
+
+(define_insn "add<mode>3_compare0"
   [(set (reg:CC_NZ CC_REGNUM)
 	(compare:CC_NZ
 	 (plus:GPI (match_operand:GPI 1 "register_operand" "%r,r,r")
@@ -1390,7 +1504,7 @@
   [(set_attr "type" "alu_ext")]
 )
 
-(define_insn "*add<mode>3_carryin"
+(define_insn "add<mode>3_carryin"
   [(set
     (match_operand:GPI 0 "register_operand" "=r")
     (plus:GPI (geu:GPI (reg:CC CC_REGNUM) (const_int 0))
@@ -1547,9 +1661,9 @@
 )
 
 (define_insn "subdi3"
-  [(set (match_operand:DI 0 "register_operand" "=rk,!w")
-	(minus:DI (match_operand:DI 1 "register_operand" "r,!w")
-		   (match_operand:DI 2 "register_operand" "r,!w")))]
+  [(set (match_operand:DI 0 "register_operand" "=rk,w")
+	(minus:DI (match_operand:DI 1 "register_operand" "r,w")
+		   (match_operand:DI 2 "register_operand" "r,w")))]
   ""
   "@
    sub\\t%x0, %x1, %x2
@@ -1558,8 +1672,26 @@
    (set_attr "simd" "*,yes")]
 )
 
+(define_expand "subti3"
+  [(set (match_operand:TI 0 "register_operand" "")
+	(minus:TI (match_operand:TI 1 "register_operand" "")
+		  (match_operand:TI 2 "register_operand" "")))]
+  ""
+{
+  rtx low = gen_reg_rtx (DImode);
+  emit_insn (gen_subdi3_compare0 (low, gen_lowpart (DImode, operands[1]),
+				  gen_lowpart (DImode, operands[2])));
 
-(define_insn "*sub<mode>3_compare0"
+  rtx high = gen_reg_rtx (DImode);
+  emit_insn (gen_subdi3_carryin (high, gen_highpart (DImode, operands[1]),
+				 gen_highpart (DImode, operands[2])));
+
+  emit_move_insn (gen_lowpart (DImode, operands[0]), low);
+  emit_move_insn (gen_highpart (DImode, operands[0]), high);
+  DONE;
+})
+
+(define_insn "sub<mode>3_compare0"
   [(set (reg:CC_NZ CC_REGNUM)
 	(compare:CC_NZ (minus:GPI (match_operand:GPI 1 "register_operand" "r")
 				  (match_operand:GPI 2 "register_operand" "r"))
@@ -1706,7 +1838,7 @@
   [(set_attr "type" "alu_ext")]
 )
 
-(define_insn "*sub<mode>3_carryin"
+(define_insn "sub<mode>3_carryin"
   [(set
     (match_operand:GPI 0 "register_operand" "=r")
     (minus:GPI (minus:GPI
@@ -1765,9 +1897,8 @@
 )
 
 (define_insn_and_split "absdi2"
-  [(set (match_operand:DI 0 "register_operand" "=r,w")
-	(abs:DI (match_operand:DI 1 "register_operand" "r,w")))
-   (clobber (match_scratch:DI 2 "=&r,X"))]
+  [(set (match_operand:DI 0 "register_operand" "=&r,w")
+	(abs:DI (match_operand:DI 1 "register_operand" "r,w")))]
   ""
   "@
    #
@@ -1777,7 +1908,7 @@
    && GP_REGNUM_P (REGNO (operands[1]))"
   [(const_int 0)]
   {
-    emit_insn (gen_rtx_SET (VOIDmode, operands[2],
+    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
 			    gen_rtx_XOR (DImode,
 					 gen_rtx_ASHIFTRT (DImode,
 							   operands[1],
@@ -1786,7 +1917,7 @@
     emit_insn (gen_rtx_SET (VOIDmode,
 			    operands[0],
 			    gen_rtx_MINUS (DImode,
-					   operands[2],
+					   operands[0],
 					   gen_rtx_ASHIFTRT (DImode,
 							     operands[1],
 							     GEN_INT (63)))));
@@ -1935,7 +2066,7 @@
   [(set_attr "type" "mul")]
 )
 
-(define_insn "*madd<mode>"
+(define_insn "madd<mode>"
   [(set (match_operand:GPI 0 "register_operand" "=r")
 	(plus:GPI (mult:GPI (match_operand:GPI 1 "register_operand" "r")
 			    (match_operand:GPI 2 "register_operand" "r"))
@@ -2045,6 +2176,48 @@
   [(set_attr "type" "<su>mull")]
 )
 
+(define_expand "<su_optab>mulditi3"
+  [(set (match_operand:TI 0 "register_operand")
+	(mult:TI (ANY_EXTEND:TI (match_operand:DI 1 "register_operand"))
+		 (ANY_EXTEND:TI (match_operand:DI 2 "register_operand"))))]
+  ""
+{
+  rtx low = gen_reg_rtx (DImode);
+  emit_insn (gen_muldi3 (low, operands[1], operands[2]));
+
+  rtx high = gen_reg_rtx (DImode);
+  emit_insn (gen_<su>muldi3_highpart (high, operands[1], operands[2]));
+
+  emit_move_insn (gen_lowpart (DImode, operands[0]), low);
+  emit_move_insn (gen_highpart (DImode, operands[0]), high);
+  DONE;
+})
+
+;; The default expansion of multi3 using umuldi3_highpart will perform
+;; the additions in an order that fails to combine into two madd insns.
+(define_expand "multi3"
+  [(set (match_operand:TI 0 "register_operand")
+	(mult:TI (match_operand:TI 1 "register_operand")
+		 (match_operand:TI 2 "register_operand")))]
+  ""
+{
+  rtx l0 = gen_reg_rtx (DImode);
+  rtx l1 = gen_lowpart (DImode, operands[1]);
+  rtx l2 = gen_lowpart (DImode, operands[2]);
+  rtx h0 = gen_reg_rtx (DImode);
+  rtx h1 = gen_highpart (DImode, operands[1]);
+  rtx h2 = gen_highpart (DImode, operands[2]);
+
+  emit_insn (gen_muldi3 (l0, l1, l2));
+  emit_insn (gen_umuldi3_highpart (h0, l1, l2));
+  emit_insn (gen_madddi (h0, h1, l2, h0));
+  emit_insn (gen_madddi (h0, l1, h2, h0));
+
+  emit_move_insn (gen_lowpart (DImode, operands[0]), l0);
+  emit_move_insn (gen_highpart (DImode, operands[0]), h0);
+  DONE;
+})
+
 (define_insn "<su>muldi3_highpart"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(truncate:DI
@@ -2345,11 +2518,46 @@
   }
 )
 
+(define_expand "mov<mode>cc"
+  [(set (match_operand:GPF 0 "register_operand" "")
+	(if_then_else:GPF (match_operand 1 "aarch64_comparison_operator" "")
+			  (match_operand:GPF 2 "register_operand" "")
+			  (match_operand:GPF 3 "register_operand" "")))]
+  ""
+  {
+    rtx ccreg;
+    enum rtx_code code = GET_CODE (operands[1]);
+
+    if (code == UNEQ || code == LTGT)
+      FAIL;
+
+    ccreg = aarch64_gen_compare_reg (code, XEXP (operands[1], 0),
+				  XEXP (operands[1], 1));
+    operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx);
+  }
+)
+
+
+;; CRC32 instructions.
+(define_insn "aarch64_<crc_variant>"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "register_operand" "r")
+                    (match_operand:<crc_mode> 2 "register_operand" "r")]
+         CRC))]
+  "TARGET_CRC32"
+  {
+    if (GET_MODE_BITSIZE (GET_MODE (operands[2])) >= 64)
+      return "<crc_variant>\\t%w0, %w1, %x2";
+    else
+      return "<crc_variant>\\t%w0, %w1, %w2";
+  }
+  [(set_attr "type" "crc")]
+)
+
 (define_insn "*csinc2<mode>_insn"
   [(set (match_operand:GPI 0 "register_operand" "=r")
-        (plus:GPI (match_operator:GPI 2 "aarch64_comparison_operator"
-		  [(match_operand:CC 3 "cc_register" "") (const_int 0)])
-		 (match_operand:GPI 1 "register_operand" "r")))]
+        (plus:GPI (match_operand 2 "aarch64_comparison_operation" "")
+                  (match_operand:GPI 1 "register_operand" "r")))]
   ""
   "csinc\\t%<w>0, %<w>1, %<w>1, %M2"
   [(set_attr "type" "csel")]
@@ -2358,13 +2566,12 @@
 (define_insn "csinc3<mode>_insn"
   [(set (match_operand:GPI 0 "register_operand" "=r")
         (if_then_else:GPI
-	  (match_operator:GPI 1 "aarch64_comparison_operator"
-	   [(match_operand:CC 2 "cc_register" "") (const_int 0)])
-	  (plus:GPI (match_operand:GPI 3 "register_operand" "r")
+	  (match_operand 1 "aarch64_comparison_operation" "")
+	  (plus:GPI (match_operand:GPI 2 "register_operand" "r")
 		    (const_int 1))
-	  (match_operand:GPI 4 "aarch64_reg_or_zero" "rZ")))]
+	  (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")))]
   ""
-  "csinc\\t%<w>0, %<w>4, %<w>3, %M1"
+  "csinc\\t%<w>0, %<w>3, %<w>2, %M1"
   [(set_attr "type" "csel")]
 )
 
@@ -2371,12 +2578,11 @@
 (define_insn "*csinv3<mode>_insn"
   [(set (match_operand:GPI 0 "register_operand" "=r")
         (if_then_else:GPI
-	  (match_operator:GPI 1 "aarch64_comparison_operator"
-	   [(match_operand:CC 2 "cc_register" "") (const_int 0)])
-	  (not:GPI (match_operand:GPI 3 "register_operand" "r"))
-	  (match_operand:GPI 4 "aarch64_reg_or_zero" "rZ")))]
+	  (match_operand 1 "aarch64_comparison_operation" "")
+	  (not:GPI (match_operand:GPI 2 "register_operand" "r"))
+	  (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")))]
   ""
-  "csinv\\t%<w>0, %<w>4, %<w>3, %M1"
+  "csinv\\t%<w>0, %<w>3, %<w>2, %M1"
   [(set_attr "type" "csel")]
 )
 
@@ -2383,12 +2589,11 @@
 (define_insn "*csneg3<mode>_insn"
   [(set (match_operand:GPI 0 "register_operand" "=r")
         (if_then_else:GPI
-	  (match_operator:GPI 1 "aarch64_comparison_operator"
-	   [(match_operand:CC 2 "cc_register" "") (const_int 0)])
-	  (neg:GPI (match_operand:GPI 3 "register_operand" "r"))
-	  (match_operand:GPI 4 "aarch64_reg_or_zero" "rZ")))]
+	  (match_operand 1 "aarch64_comparison_operation" "")
+	  (neg:GPI (match_operand:GPI 2 "register_operand" "r"))
+	  (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")))]
   ""
-  "csneg\\t%<w>0, %<w>4, %<w>3, %M1"
+  "csneg\\t%<w>0, %<w>3, %<w>2, %M1"
   [(set_attr "type" "csel")]
 )
 
@@ -2397,12 +2602,16 @@
 ;; -------------------------------------------------------------------
 
 (define_insn "<optab><mode>3"
-  [(set (match_operand:GPI 0 "register_operand" "=r,rk")
-	(LOGICAL:GPI (match_operand:GPI 1 "register_operand" "%r,r")
-		     (match_operand:GPI 2 "aarch64_logical_operand" "r,<lconst>")))]
+  [(set (match_operand:GPI 0 "register_operand" "=r,rk,w")
+	(LOGICAL:GPI (match_operand:GPI 1 "register_operand" "%r,r,w")
+		     (match_operand:GPI 2 "aarch64_logical_operand" "r,<lconst>,w")))]
   ""
-  "<logical>\\t%<w>0, %<w>1, %<w>2"
-  [(set_attr "type" "logic_reg,logic_imm")]
+  "@
+  <logical>\\t%<w>0, %<w>1, %<w>2
+  <logical>\\t%<w>0, %<w>1, %<w>2
+  <logical>\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
+  [(set_attr "type" "logic_reg,logic_imm,neon_logic")
+   (set_attr "simd" "*,*,yes")]
 )
 
 ;; zero_extend version of above
@@ -2486,7 +2695,18 @@
   [(set_attr "type" "logic_shift_imm")]
 )
 
-;; zero_extend version of above
+(define_insn "*<optab>_rol<mode>3"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(LOGICAL:GPI (rotate:GPI
+		      (match_operand:GPI 1 "register_operand" "r")
+		      (match_operand:QI 2 "aarch64_shift_imm_<mode>" "n"))
+		     (match_operand:GPI 3 "register_operand" "r")))]
+  ""
+  "<logical>\\t%<w>0, %<w>3, %<w>1, ror (<sizen> - %2)"
+  [(set_attr "type" "logic_shift_imm")]
+)
+
+;; zero_extend versions of above
 (define_insn "*<LOGICAL:optab>_<SHIFT:optab>si3_uxtw"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(zero_extend:DI
@@ -2499,12 +2719,27 @@
   [(set_attr "type" "logic_shift_imm")]
 )
 
+(define_insn "*<optab>_rolsi3_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	 (LOGICAL:SI (rotate:SI
+		      (match_operand:SI 1 "register_operand" "r")
+		      (match_operand:QI 2 "aarch64_shift_imm_si" "n"))
+		     (match_operand:SI 3 "register_operand" "r"))))]
+  ""
+  "<logical>\\t%w0, %w3, %w1, ror (32 - %2)"
+  [(set_attr "type" "logic_shift_imm")]
+)
+
 (define_insn "one_cmpl<mode>2"
-  [(set (match_operand:GPI 0 "register_operand" "=r")
-	(not:GPI (match_operand:GPI 1 "register_operand" "r")))]
+  [(set (match_operand:GPI 0 "register_operand" "=r,w")
+	(not:GPI (match_operand:GPI 1 "register_operand" "r,w")))]
   ""
-  "mvn\\t%<w>0, %<w>1"
-  [(set_attr "type" "logic_reg")]
+  "@
+  mvn\\t%<w>0, %<w>1
+  mvn\\t%0.8b, %1.8b"
+  [(set_attr "type" "logic_reg,neon_logic")
+   (set_attr "simd" "*,yes")]
 )
 
 (define_insn "*one_cmpl_<optab><mode>2"
@@ -2516,16 +2751,38 @@
   [(set_attr "type" "logic_shift_imm")]
 )
 
-(define_insn "*<LOGICAL:optab>_one_cmpl<mode>3"
-  [(set (match_operand:GPI 0 "register_operand" "=r")
-	(LOGICAL:GPI (not:GPI
-		      (match_operand:GPI 1 "register_operand" "r"))
-		     (match_operand:GPI 2 "register_operand" "r")))]
+;; Binary logical operators negating one operand, i.e. (a & !b), (a | !b).
+
+(define_insn "*<NLOGICAL:optab>_one_cmpl<mode>3"
+  [(set (match_operand:GPI 0 "register_operand" "=r,w")
+	(NLOGICAL:GPI (not:GPI (match_operand:GPI 1 "register_operand" "r,w"))
+		     (match_operand:GPI 2 "register_operand" "r,w")))]
   ""
-  "<LOGICAL:nlogical>\\t%<w>0, %<w>2, %<w>1"
-  [(set_attr "type" "logic_reg")]
+  "@
+  <NLOGICAL:nlogical>\\t%<w>0, %<w>2, %<w>1
+  <NLOGICAL:nlogical>\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
+  [(set_attr "type" "logic_reg,neon_logic")
+   (set_attr "simd" "*,yes")]
 )
 
+;; (xor (not a) b) is simplify_rtx-ed down to (not (xor a b)).
+;; eon does not operate on SIMD registers so the vector variant must be split.
+(define_insn_and_split "*xor_one_cmpl<mode>3"
+  [(set (match_operand:GPI 0 "register_operand" "=r,w")
+        (not:GPI (xor:GPI (match_operand:GPI 1 "register_operand" "r,?w")
+                          (match_operand:GPI 2 "register_operand" "r,w"))))]
+  ""
+  "eon\\t%<w>0, %<w>1, %<w>2" ;; For GPR registers (only).
+  "reload_completed && (which_alternative == 1)" ;; For SIMD registers.
+  [(set (match_operand:GPI 0 "register_operand" "=w")
+        (xor:GPI (match_operand:GPI 1 "register_operand" "w")
+                 (match_operand:GPI 2 "register_operand" "w")))
+   (set (match_dup 0) (not:GPI (match_dup 0)))]
+  ""
+  [(set_attr "type" "logic_reg,multiple")
+   (set_attr "simd" "*,yes")]
+)
+
 (define_insn "*and_one_cmpl<mode>3_compare0"
   [(set (reg:CC_NZ CC_REGNUM)
 	(compare:CC_NZ
@@ -2555,6 +2812,18 @@
   [(set_attr "type" "logics_reg")]
 )
 
+(define_insn "*and_one_cmpl<mode>3_compare0_no_reuse"
+  [(set (reg:CC_NZ CC_REGNUM)
+    (compare:CC_NZ
+     (and:GPI (not:GPI
+           (match_operand:GPI 0 "register_operand" "r"))
+          (match_operand:GPI 1 "register_operand" "r"))
+     (const_int 0)))]
+  ""
+  "bics\\t<w>zr, %<w>1, %<w>0"
+  [(set_attr "type" "logics_reg")]
+)
+
 (define_insn "*<LOGICAL:optab>_one_cmpl_<SHIFT:optab><mode>3"
   [(set (match_operand:GPI 0 "register_operand" "=r")
 	(LOGICAL:GPI (not:GPI
@@ -2604,6 +2873,20 @@
   [(set_attr "type" "logics_shift_imm")]
 )
 
+(define_insn "*and_one_cmpl_<SHIFT:optab><mode>3_compare0_no_reuse"
+  [(set (reg:CC_NZ CC_REGNUM)
+    (compare:CC_NZ
+     (and:GPI (not:GPI
+           (SHIFT:GPI
+            (match_operand:GPI 0 "register_operand" "r")
+            (match_operand:QI 1 "aarch64_shift_imm_<mode>" "n")))
+          (match_operand:GPI 2 "register_operand" "r"))
+     (const_int 0)))]
+  ""
+  "bics\\t<w>zr, %<w>2, %<w>0, <SHIFT:shift> %1"
+  [(set_attr "type" "logics_shift_imm")]
+)
+
 (define_insn "clz<mode>2"
   [(set (match_operand:GPI 0 "register_operand" "=r")
 	(clz:GPI (match_operand:GPI 1 "register_operand" "r")))]
@@ -2622,7 +2905,7 @@
 
     emit_insn (gen_rbit<mode>2 (operands[0], operands[1]));
     emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
-    emit_insn (gen_csinc3<mode>_insn (operands[0], x, ccreg, operands[0], const0_rtx));
+    emit_insn (gen_csinc3<mode>_insn (operands[0], x, operands[0], const0_rtx));
     DONE;
   }
 )
@@ -2629,7 +2912,7 @@
 
 (define_insn "clrsb<mode>2"
   [(set (match_operand:GPI 0 "register_operand" "=r")
-	(unspec:GPI [(match_operand:GPI 1 "register_operand" "r")] UNSPEC_CLS))]
+        (clrsb:GPI (match_operand:GPI 1 "register_operand" "r")))]
   ""
   "cls\\t%<w>0, %<w>1"
   [(set_attr "type" "clz")]
@@ -3122,7 +3405,7 @@
   [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand" "+r")
 			  (match_operand 1 "const_int_operand" "n")
 			  (const_int 0))
-	(zero_extract:GPI (match_operand:GPI 2 "register_operand" "+r")
+	(zero_extract:GPI (match_operand:GPI 2 "register_operand" "r")
 			  (match_dup 1)
 			  (match_operand 3 "const_int_operand" "n")))]
   "!(UINTVAL (operands[1]) == 0
@@ -3177,6 +3460,38 @@
   [(set_attr "type" "rev")]
 )
 
+;; There are no canonicalisation rules for the position of the lshiftrt, ashift
+;; operations within an IOR/AND RTX, therefore we have two patterns matching
+;; each valid permutation.
+
+(define_insn "rev16<mode>2"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+        (ior:GPI (and:GPI (ashift:GPI (match_operand:GPI 1 "register_operand" "r")
+                                      (const_int 8))
+                          (match_operand:GPI 3 "const_int_operand" "n"))
+                 (and:GPI (lshiftrt:GPI (match_dup 1)
+                                        (const_int 8))
+                          (match_operand:GPI 2 "const_int_operand" "n"))))]
+  "aarch_rev16_shleft_mask_imm_p (operands[3], <MODE>mode)
+   && aarch_rev16_shright_mask_imm_p (operands[2], <MODE>mode)"
+  "rev16\\t%<w>0, %<w>1"
+  [(set_attr "type" "rev")]
+)
+
+(define_insn "rev16<mode>2_alt"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+        (ior:GPI (and:GPI (lshiftrt:GPI (match_operand:GPI 1 "register_operand" "r")
+                                        (const_int 8))
+                          (match_operand:GPI 2 "const_int_operand" "n"))
+                 (and:GPI (ashift:GPI (match_dup 1)
+                                      (const_int 8))
+                          (match_operand:GPI 3 "const_int_operand" "n"))))]
+  "aarch_rev16_shleft_mask_imm_p (operands[3], <MODE>mode)
+   && aarch_rev16_shright_mask_imm_p (operands[2], <MODE>mode)"
+  "rev16\\t%<w>0, %<w>1"
+  [(set_attr "type" "rev")]
+)
+
 ;; zero_extend version of above
 (define_insn "*bswapsi2_uxtw"
   [(set (match_operand:DI 0 "register_operand" "=r")
@@ -3191,7 +3506,7 @@
 ;; -------------------------------------------------------------------
 
 ;; frint floating-point round to integral standard patterns.
-;; Expands to btrunc, ceil, floor, nearbyint, rint, round.
+;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
 
 (define_insn "<frint_pattern><mode>2"
   [(set (match_operand:GPF 0 "register_operand" "=w")
@@ -3302,20 +3617,24 @@
   [(set_attr "type" "f_cvtf2i")]
 )
 
-(define_insn "float<GPI:mode><GPF:mode>2"
-  [(set (match_operand:GPF 0 "register_operand" "=w")
-        (float:GPF (match_operand:GPI 1 "register_operand" "r")))]
-  "TARGET_FLOAT"
-  "scvtf\\t%<GPF:s>0, %<GPI:w>1"
-  [(set_attr "type" "f_cvti2f")]
+(define_insn "<optab><fcvt_target><GPF:mode>2"
+  [(set (match_operand:GPF 0 "register_operand" "=w,w")
+        (FLOATUORS:GPF (match_operand:<FCVT_TARGET> 1 "register_operand" "w,r")))]
+  ""
+  "@
+   <su_optab>cvtf\t%<GPF:s>0, %<s>1
+   <su_optab>cvtf\t%<GPF:s>0, %<w1>1"
+  [(set_attr "simd" "yes,no")
+   (set_attr "fp" "no,yes")
+   (set_attr "type" "neon_int_to_fp_<Vetype>,f_cvti2f")]
 )
 
-(define_insn "floatuns<GPI:mode><GPF:mode>2"
+(define_insn "<optab><fcvt_iesize><GPF:mode>2"
   [(set (match_operand:GPF 0 "register_operand" "=w")
-        (unsigned_float:GPF (match_operand:GPI 1 "register_operand" "r")))]
+        (FLOATUORS:GPF (match_operand:<FCVT_IESIZE> 1 "register_operand" "r")))]
   "TARGET_FLOAT"
-  "ucvtf\\t%<GPF:s>0, %<GPI:w>1"
-  [(set_attr "type" "f_cvt")]
+  "<su_optab>cvtf\t%<GPF:s>0, %<w2>1"
+  [(set_attr "type" "f_cvti2f")]
 )
 
 ;; -------------------------------------------------------------------
@@ -3497,7 +3816,7 @@
 	  (truncate:DI (match_operand:TI 1 "register_operand" "w"))))]
   "reload_completed || reload_in_progress"
   "fmov\\t%d0, %d1"
-  [(set_attr "type" "f_mcr")
+  [(set_attr "type" "fmov")
    (set_attr "length" "4")
   ])
 
@@ -3595,36 +3914,63 @@
   [(set_attr "type" "call")
    (set_attr "length" "16")])
 
-(define_insn "tlsie_small"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-        (unspec:DI [(match_operand:DI 1 "aarch64_tls_ie_symref" "S")]
+(define_insn "tlsie_small_<mode>"
+  [(set (match_operand:PTR 0 "register_operand" "=r")
+        (unspec:PTR [(match_operand 1 "aarch64_tls_ie_symref" "S")]
 		   UNSPEC_GOTSMALLTLS))]
   ""
-  "adrp\\t%0, %A1\;ldr\\t%0, [%0, #%L1]"
+  "adrp\\t%0, %A1\;ldr\\t%<w>0, [%0, #%L1]"
   [(set_attr "type" "load1")
    (set_attr "length" "8")]
 )
 
-(define_insn "tlsle_small"
+(define_insn "tlsie_small_sidi"
   [(set (match_operand:DI 0 "register_operand" "=r")
-        (unspec:DI [(match_operand:DI 1 "register_operand" "r")
-                   (match_operand:DI 2 "aarch64_tls_le_symref" "S")]
+	(zero_extend:DI
+          (unspec:SI [(match_operand 1 "aarch64_tls_ie_symref" "S")]
+		      UNSPEC_GOTSMALLTLS)))]
+  ""
+  "adrp\\t%0, %A1\;ldr\\t%w0, [%0, #%L1]"
+  [(set_attr "type" "load1")
+   (set_attr "length" "8")]
+)
+
+(define_expand "tlsle_small"
+  [(set (match_operand 0 "register_operand" "=r")
+        (unspec [(match_operand 1 "register_operand" "r")
+                   (match_operand 2 "aarch64_tls_le_symref" "S")]
+                   UNSPEC_GOTSMALLTLS))]
+  ""
+{
+  enum machine_mode mode = GET_MODE (operands[0]);
+  emit_insn ((mode == DImode
+	      ? gen_tlsle_small_di
+	      : gen_tlsle_small_si) (operands[0],
+				     operands[1],
+				     operands[2]));
+  DONE;
+})
+
+(define_insn "tlsle_small_<mode>"
+  [(set (match_operand:P 0 "register_operand" "=r")
+        (unspec:P [(match_operand:P 1 "register_operand" "r")
+                   (match_operand 2 "aarch64_tls_le_symref" "S")]
 		   UNSPEC_GOTSMALLTLS))]
   ""
-  "add\\t%0, %1, #%G2\;add\\t%0, %0, #%L2"
+  "add\\t%<w>0, %<w>1, #%G2\;add\\t%<w>0, %<w>0, #%L2"
   [(set_attr "type" "alu_reg")
    (set_attr "length" "8")]
 )
 
-(define_insn "tlsdesc_small"
-  [(set (reg:DI R0_REGNUM)
-        (unspec:DI [(match_operand:DI 0 "aarch64_valid_symref" "S")]
+(define_insn "tlsdesc_small_<mode>"
+  [(set (reg:PTR R0_REGNUM)
+        (unspec:PTR [(match_operand 0 "aarch64_valid_symref" "S")]
 		   UNSPEC_TLSDESC))
    (clobber (reg:DI LR_REGNUM))
    (clobber (reg:CC CC_REGNUM))
    (clobber (match_scratch:DI 1 "=r"))]
   "TARGET_TLS_DESC"
-  "adrp\\tx0, %A0\;ldr\\t%1, [x0, #%L0]\;add\\tx0, x0, %L0\;.tlsdesccall\\t%0\;blr\\t%1"
+  "adrp\\tx0, %A0\;ldr\\t%<w>1, [x0, #%L0]\;add\\t<w>0, <w>0, %L0\;.tlsdesccall\\t%0\;blr\\t%1"
   [(set_attr "type" "call")
    (set_attr "length" "16")])
 
@@ -3649,6 +3995,135 @@
   DONE;
 })
 
+;; Named patterns for stack smashing protection.
+(define_expand "stack_protect_set"
+  [(match_operand 0 "memory_operand")
+   (match_operand 1 "memory_operand")]
+  ""
+{
+  enum machine_mode mode = GET_MODE (operands[0]);
+
+  emit_insn ((mode == DImode
+	      ? gen_stack_protect_set_di
+	      : gen_stack_protect_set_si) (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "stack_protect_set_<mode>"
+  [(set (match_operand:PTR 0 "memory_operand" "=m")
+	(unspec:PTR [(match_operand:PTR 1 "memory_operand" "m")]
+	 UNSPEC_SP_SET))
+   (set (match_scratch:PTR 2 "=&r") (const_int 0))]
+  ""
+  "ldr\\t%<w>2, %1\;str\\t%<w>2, %0\;mov\t%<w>2,0"
+  [(set_attr "length" "12")
+   (set_attr "type" "multiple")])
+
+(define_expand "stack_protect_test"
+  [(match_operand 0 "memory_operand")
+   (match_operand 1 "memory_operand")
+   (match_operand 2)]
+  ""
+{
+  rtx result;
+  enum machine_mode mode = GET_MODE (operands[0]);
+
+  result = gen_reg_rtx(mode);
+
+  emit_insn ((mode == DImode
+	      ? gen_stack_protect_test_di
+	      : gen_stack_protect_test_si) (result,
+					    operands[0],
+					    operands[1]));
+
+  if (mode == DImode)
+    emit_jump_insn (gen_cbranchdi4 (gen_rtx_EQ (VOIDmode, result, const0_rtx),
+				    result, const0_rtx, operands[2]));
+  else
+    emit_jump_insn (gen_cbranchsi4 (gen_rtx_EQ (VOIDmode, result, const0_rtx),
+				    result, const0_rtx, operands[2]));
+  DONE;
+})
+
+(define_insn "stack_protect_test_<mode>"
+  [(set (match_operand:PTR 0 "register_operand" "=r")
+	(unspec:PTR [(match_operand:PTR 1 "memory_operand" "m")
+		     (match_operand:PTR 2 "memory_operand" "m")]
+	 UNSPEC_SP_TEST))
+   (clobber (match_scratch:PTR 3 "=&r"))]
+  ""
+  "ldr\t%<w>3, %x1\;ldr\t%<w>0, %x2\;eor\t%<w>0, %<w>3, %<w>0"
+  [(set_attr "length" "12")
+   (set_attr "type" "multiple")])
+
+;; Write Floating-point Control Register.
+(define_insn "set_fpcr"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] UNSPECV_SET_FPCR)]
+  ""
+  "msr\\tfpcr, %0"
+  [(set_attr "type" "mrs")])
+
+;; Read Floating-point Control Register.
+(define_insn "get_fpcr"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec_volatile:SI [(const_int 0)] UNSPECV_GET_FPCR))]
+  ""
+  "mrs\\t%0, fpcr"
+  [(set_attr "type" "mrs")])
+
+;; Write Floating-point Status Register.
+(define_insn "set_fpsr"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] UNSPECV_SET_FPSR)]
+  ""
+  "msr\\tfpsr, %0"
+  [(set_attr "type" "mrs")])
+
+;; Read Floating-point Status Register.
+(define_insn "get_fpsr"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec_volatile:SI [(const_int 0)] UNSPECV_GET_FPSR))]
+  ""
+  "mrs\\t%0, fpsr"
+  [(set_attr "type" "mrs")])
+
+
+;; Define the subtract-one-and-jump insns so loop.c
+;; knows what to generate.
+(define_expand "doloop_end"
+  [(use (match_operand 0 "" ""))      ; loop pseudo
+   (use (match_operand 1 "" ""))]     ; label
+  "optimize > 0 && flag_modulo_sched"
+{
+  rtx s0;
+  rtx bcomp;
+  rtx loc_ref;
+  rtx cc_reg;
+  rtx insn;
+  rtx cmp;
+
+  /* Currently SMS relies on the do-loop pattern to recognize loops
+     where (1) the control part consists of all insns defining and/or
+     using a certain 'count' register and (2) the loop count can be
+     adjusted by modifying this register prior to the loop.
+     ??? The possible introduction of a new block to initialize the
+     new IV can potentially affect branch optimizations.  */
+
+  if (GET_MODE (operands[0]) != DImode)
+    FAIL;
+
+  s0 = operands [0];
+  insn = emit_insn (gen_adddi3_compare0 (s0, s0, GEN_INT (-1)));
+
+  cmp = XVECEXP (PATTERN (insn), 0, 0);
+  cc_reg = SET_DEST (cmp);
+  bcomp = gen_rtx_NE (VOIDmode, cc_reg, const0_rtx);
+  loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands [1]);
+  emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
+			       gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
+						     loc_ref, pc_rtx)));
+  DONE;
+})
+
 ;; AdvSIMD Stuff
 (include "aarch64-simd.md")
 
--- a/src/gcc/config/aarch64/t-aarch64
+++ b/src/gcc/config/aarch64/t-aarch64
@@ -31,10 +31,17 @@
   $(SYSTEM_H) coretypes.h $(TM_H) \
   $(RTL_H) $(TREE_H) expr.h $(TM_P_H) $(RECOG_H) langhooks.h \
   $(DIAGNOSTIC_CORE_H) $(OPTABS_H) \
-  $(srcdir)/config/aarch64/aarch64-simd-builtins.def
+  $(srcdir)/config/aarch64/aarch64-simd-builtins.def \
+  aarch64-builtin-iterators.h
 	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
 		$(srcdir)/config/aarch64/aarch64-builtins.c
 
+aarch64-builtin-iterators.h: $(srcdir)/config/aarch64/geniterators.sh \
+	$(srcdir)/config/aarch64/iterators.md
+	$(SHELL) $(srcdir)/config/aarch64/geniterators.sh \
+		$(srcdir)/config/aarch64/iterators.md > \
+		aarch64-builtin-iterators.h
+
 aarch-common.o: $(srcdir)/config/arm/aarch-common.c $(CONFIG_H) $(SYSTEM_H) \
     coretypes.h $(TM_H) $(TM_P_H) $(RTL_H) $(TREE_H) output.h $(C_COMMON_H)
 	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
--- a/src/gcc/config/aarch64/arm_acle.h
+++ b/src/gcc/config/aarch64/arm_acle.h
@@ -0,0 +1,90 @@
+/* AArch64 Non-NEON ACLE intrinsics include file.
+
+   Copyright (C) 2014 Free Software Foundation, Inc.
+   Contributed by ARM Ltd.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _GCC_ARM_ACLE_H
+#define _GCC_ARM_ACLE_H
+
+#include <stdint.h>
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef __ARM_FEATURE_CRC32
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+__crc32b (uint32_t __a, uint8_t __b)
+{
+  return __builtin_aarch64_crc32b (__a, __b);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+__crc32cb (uint32_t __a, uint8_t __b)
+{
+  return __builtin_aarch64_crc32cb (__a, __b);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+__crc32ch (uint32_t __a, uint16_t __b)
+{
+  return __builtin_aarch64_crc32ch (__a, __b);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+__crc32cw (uint32_t __a, uint32_t __b)
+{
+  return __builtin_aarch64_crc32cw (__a, __b);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+__crc32cd (uint32_t __a, uint64_t __b)
+{
+  return __builtin_aarch64_crc32cx (__a, __b);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+__crc32h (uint32_t __a, uint16_t __b)
+{
+  return __builtin_aarch64_crc32h (__a, __b);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+__crc32w (uint32_t __a, uint32_t __b)
+{
+  return __builtin_aarch64_crc32w (__a, __b);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+__crc32d (uint32_t __a, uint64_t __b)
+{
+  return __builtin_aarch64_crc32x (__a, __b);
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/src/gcc/config/aarch64/aarch64-cost-tables.h
+++ b/src/gcc/config/aarch64/aarch64-cost-tables.h
@@ -0,0 +1,131 @@
+/* RTX cost tables for AArch64.
+
+   Copyright (C) 2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_AARCH64_COST_TABLES_H
+#define GCC_AARCH64_COST_TABLES_H
+
+#include "config/arm/aarch-cost-tables.h"
+
+/* ThunderX does not have implement AArch32.  */
+const struct cpu_cost_table thunderx_extra_costs =
+{
+  /* ALU */
+  {
+    0,			/* Arith.  */
+    0,			/* Logical.  */
+    0,			/* Shift.  */
+    0,			/* Shift_reg.  */
+    COSTS_N_INSNS (1),	/* Arith_shift.  */
+    COSTS_N_INSNS (1),	/* Arith_shift_reg.  */
+    COSTS_N_INSNS (1),	/* UNUSED: Log_shift.  */
+    COSTS_N_INSNS (1),	/* UNUSED: Log_shift_reg.  */
+    0,			/* Extend.  */
+    COSTS_N_INSNS (1),	/* Extend_arith.  */
+    0,			/* Bfi.  */
+    0,			/* Bfx.  */
+    COSTS_N_INSNS (5),	/* Clz.  */
+    0,			/* rev.  */
+    0,			/* UNUSED: non_exec.  */
+    false		/* UNUSED: non_exec_costs_exec.  */
+  },
+  {
+    /* MULT SImode */
+    {
+      COSTS_N_INSNS (3),	/* Simple.  */
+      0,			/* Flag_setting.  */
+      0,			/* Extend.  */
+      0,			/* Add.  */
+      COSTS_N_INSNS (1),	/* Extend_add.  */
+      COSTS_N_INSNS (21)	/* Idiv.  */
+    },
+    /* MULT DImode */
+    {
+      COSTS_N_INSNS (3),	/* Simple.  */
+      0,			/* Flag_setting.  */
+      0,			/* Extend.  */
+      0,			/* Add.  */
+      COSTS_N_INSNS (1),	/* Extend_add.  */
+      COSTS_N_INSNS (37)	/* Idiv.  */
+    },
+  },
+  /* LD/ST */
+  {
+    COSTS_N_INSNS (2),	/* Load.  */
+    COSTS_N_INSNS (2),	/* Load_sign_extend.  */
+    COSTS_N_INSNS (2),	/* Ldrd.  */
+    0,			/* N/A: Ldm_1st.  */
+    0,			/* N/A: Ldm_regs_per_insn_1st.  */
+    0,			/* N/A: Ldm_regs_per_insn_subsequent.  */
+    COSTS_N_INSNS (3),	/* Loadf.  */
+    COSTS_N_INSNS (3),	/* Loadd.  */
+    0,  		/* N/A: Load_unaligned.  */
+    0,			/* Store.  */
+    0,			/* Strd.  */
+    0,			/* N/A: Stm_1st.  */
+    0,			/* N/A: Stm_regs_per_insn_1st.  */
+    0,			/* N/A: Stm_regs_per_insn_subsequent.  */
+    0,			/* Storef.  */
+    0,			/* Stored.  */
+    COSTS_N_INSNS (1)  /* Store_unaligned.  */
+  },
+  {
+    /* FP SFmode */
+    {
+      COSTS_N_INSNS (11),	/* Div.  */
+      COSTS_N_INSNS (5),	/* Mult.  */
+      COSTS_N_INSNS (5),	/* Mult_addsub.  */
+      COSTS_N_INSNS (5),	/* Fma.  */
+      COSTS_N_INSNS (3),	/* Addsub.  */
+      0,			/* Fpconst.  */
+      COSTS_N_INSNS (1),	/* Neg.  */
+      0,			/* Compare.  */
+      COSTS_N_INSNS (5),	/* Widen.  */
+      COSTS_N_INSNS (5),	/* Narrow.  */
+      COSTS_N_INSNS (5),	/* Toint.  */
+      COSTS_N_INSNS (5),	/* Fromint.  */
+      COSTS_N_INSNS (1)		/* Roundint.  */
+    },
+    /* FP DFmode */
+    {
+      COSTS_N_INSNS (21),	/* Div.  */
+      COSTS_N_INSNS (5),	/* Mult.  */
+      COSTS_N_INSNS (5),	/* Mult_addsub.  */
+      COSTS_N_INSNS (5),	/* Fma.  */
+      COSTS_N_INSNS (3),	/* Addsub.  */
+      0,			/* Fpconst.  */
+      COSTS_N_INSNS (1),	/* Neg.  */
+      0,			/* Compare.  */
+      COSTS_N_INSNS (5),	/* Widen.  */
+      COSTS_N_INSNS (5),	/* Narrow.  */
+      COSTS_N_INSNS (5),	/* Toint.  */
+      COSTS_N_INSNS (5),	/* Fromint.  */
+      COSTS_N_INSNS (1)		/* Roundint.  */
+    }
+  },
+  /* Vector */
+  {
+    COSTS_N_INSNS (1)	/* Alu.  */
+  }
+};
+
+
+
+#endif
+
--- a/src/gcc/config/aarch64/aarch64-cores.def
+++ b/src/gcc/config/aarch64/aarch64-cores.def
@@ -34,9 +34,13 @@
 
 /* V8 Architecture Processors.  */
 
-AARCH64_CORE("cortex-a53",  cortexa53, cortexa53, 8,  AARCH64_FL_FPSIMD | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa53)
-AARCH64_CORE("cortex-a57",  cortexa15, cortexa15, 8,  AARCH64_FL_FPSIMD | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57)
+AARCH64_CORE("cortex-a53",  cortexa53, cortexa53, 8,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53)
+AARCH64_CORE("cortex-a57",  cortexa57, cortexa57, 8,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57)
+AARCH64_CORE("cortex-a72",  cortexa72, cortexa57, 8,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57)
+AARCH64_CORE("thunderx",    thunderx,  thunderx, 8,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx)
+AARCH64_CORE("xgene1",      xgene1,    xgene1,    8,  AARCH64_FL_FOR_ARCH8, xgene1)
 
 /* V8 big.LITTLE implementations.  */
 
-AARCH64_CORE("cortex-a57.cortex-a53",  cortexa57cortexa53, cortexa53, 8,  AARCH64_FL_FPSIMD | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57)
+AARCH64_CORE("cortex-a57.cortex-a53",  cortexa57cortexa53, cortexa53, 8,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57)
+AARCH64_CORE("cortex-a72.cortex-a53",  cortexa72cortexa53, cortexa53, 8,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57)
--- a/src/gcc/config/aarch64/aarch64-tune.md
+++ b/src/gcc/config/aarch64/aarch64-tune.md
@@ -1,5 +1,5 @@
 ;; -*- buffer-read-only: t -*-
 ;; Generated automatically by gentune.sh from aarch64-cores.def
 (define_attr "tune"
-	"cortexa53,cortexa15,cortexa57cortexa53"
+	"cortexa53,cortexa57,cortexa72,thunderx,xgene1,cortexa57cortexa53,cortexa72cortexa53"
 	(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
--- a/src/gcc/config/aarch64/aarch64-builtins.c
+++ b/src/gcc/config/aarch64/aarch64-builtins.c
@@ -47,52 +47,27 @@
 #include "gimple.h"
 #include "gimple-iterator.h"
 
-enum aarch64_simd_builtin_type_mode
-{
-  T_V8QI,
-  T_V4HI,
-  T_V2SI,
-  T_V2SF,
-  T_DI,
-  T_DF,
-  T_V16QI,
-  T_V8HI,
-  T_V4SI,
-  T_V4SF,
-  T_V2DI,
-  T_V2DF,
-  T_TI,
-  T_EI,
-  T_OI,
-  T_XI,
-  T_SI,
-  T_SF,
-  T_HI,
-  T_QI,
-  T_MAX
-};
-
-#define v8qi_UP  T_V8QI
-#define v4hi_UP  T_V4HI
-#define v2si_UP  T_V2SI
-#define v2sf_UP  T_V2SF
-#define di_UP    T_DI
-#define df_UP    T_DF
-#define v16qi_UP T_V16QI
-#define v8hi_UP  T_V8HI
-#define v4si_UP  T_V4SI
-#define v4sf_UP  T_V4SF
-#define v2di_UP  T_V2DI
-#define v2df_UP  T_V2DF
-#define ti_UP	 T_TI
-#define ei_UP	 T_EI
-#define oi_UP	 T_OI
-#define xi_UP	 T_XI
-#define si_UP    T_SI
-#define sf_UP    T_SF
-#define hi_UP    T_HI
-#define qi_UP    T_QI
-
+#define v8qi_UP  V8QImode
+#define v4hi_UP  V4HImode
+#define v2si_UP  V2SImode
+#define v2sf_UP  V2SFmode
+#define di_UP    DImode
+#define df_UP    DFmode
+#define v16qi_UP V16QImode
+#define v8hi_UP  V8HImode
+#define v4si_UP  V4SImode
+#define v4sf_UP  V4SFmode
+#define v2di_UP  V2DImode
+#define v2df_UP  V2DFmode
+#define ti_UP	 TImode
+#define ei_UP	 EImode
+#define oi_UP	 OImode
+#define ci_UP	 CImode
+#define xi_UP	 XImode
+#define si_UP    SImode
+#define sf_UP    SFmode
+#define hi_UP    HImode
+#define qi_UP    QImode
 #define UP(X) X##_UP
 
 #define SIMD_MAX_BUILTIN_ARGS 5
@@ -107,8 +82,6 @@
   qualifier_const = 0x2, /* 1 << 1  */
   /* T *foo.  */
   qualifier_pointer = 0x4, /* 1 << 2  */
-  /* const T *foo.  */
-  qualifier_const_pointer = 0x6, /* qualifier_const | qualifier_pointer  */
   /* Used when expanding arguments if an operand could
      be an immediate.  */
   qualifier_immediate = 0x8, /* 1 << 3  */
@@ -123,7 +96,7 @@
   qualifier_map_mode = 0x80, /* 1 << 7  */
   /* qualifier_pointer | qualifier_map_mode  */
   qualifier_pointer_map_mode = 0x84,
-  /* qualifier_const_pointer | qualifier_map_mode  */
+  /* qualifier_const | qualifier_pointer | qualifier_map_mode  */
   qualifier_const_pointer_map_mode = 0x86,
   /* Polynomial types.  */
   qualifier_poly = 0x100
@@ -132,7 +105,7 @@
 typedef struct
 {
   const char *name;
-  enum aarch64_simd_builtin_type_mode mode;
+  enum machine_mode mode;
   const enum insn_code code;
   unsigned int fcode;
   enum aarch64_type_qualifiers *qualifiers;
@@ -147,16 +120,49 @@
   = { qualifier_unsigned, qualifier_unsigned };
 #define TYPES_UNOPU (aarch64_types_unopu_qualifiers)
 #define TYPES_CREATE (aarch64_types_unop_qualifiers)
-#define TYPES_REINTERP (aarch64_types_unop_qualifiers)
+#define TYPES_REINTERP_SS (aarch64_types_unop_qualifiers)
 static enum aarch64_type_qualifiers
+aarch64_types_unop_su_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_unsigned };
+#define TYPES_REINTERP_SU (aarch64_types_unop_su_qualifiers)
+static enum aarch64_type_qualifiers
+aarch64_types_unop_sp_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_poly };
+#define TYPES_REINTERP_SP (aarch64_types_unop_sp_qualifiers)
+static enum aarch64_type_qualifiers
+aarch64_types_unop_us_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_unsigned, qualifier_none };
+#define TYPES_REINTERP_US (aarch64_types_unop_us_qualifiers)
+static enum aarch64_type_qualifiers
+aarch64_types_unop_ps_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_poly, qualifier_none };
+#define TYPES_REINTERP_PS (aarch64_types_unop_ps_qualifiers)
+static enum aarch64_type_qualifiers
 aarch64_types_binop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   = { qualifier_none, qualifier_none, qualifier_maybe_immediate };
 #define TYPES_BINOP (aarch64_types_binop_qualifiers)
 static enum aarch64_type_qualifiers
+aarch64_types_cmtst_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_none, qualifier_none,
+      qualifier_internal, qualifier_internal };
+#define TYPES_TST (aarch64_types_cmtst_qualifiers)
+static enum aarch64_type_qualifiers
+aarch64_types_binopv_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_void, qualifier_none, qualifier_none };
+#define TYPES_BINOPV (aarch64_types_binopv_qualifiers)
+static enum aarch64_type_qualifiers
 aarch64_types_binopu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned };
 #define TYPES_BINOPU (aarch64_types_binopu_qualifiers)
 static enum aarch64_type_qualifiers
+aarch64_types_binop_uus_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_unsigned, qualifier_unsigned, qualifier_none };
+#define TYPES_BINOP_UUS (aarch64_types_binop_uus_qualifiers)
+static enum aarch64_type_qualifiers
+aarch64_types_binop_ssu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_none, qualifier_unsigned };
+#define TYPES_BINOP_SSU (aarch64_types_binop_ssu_qualifiers)
+static enum aarch64_type_qualifiers
 aarch64_types_binopp_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   = { qualifier_poly, qualifier_poly, qualifier_poly };
 #define TYPES_BINOPP (aarch64_types_binopp_qualifiers)
@@ -172,10 +178,10 @@
 #define TYPES_TERNOPU (aarch64_types_ternopu_qualifiers)
 
 static enum aarch64_type_qualifiers
-aarch64_types_quadop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+aarch64_types_ternop_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   = { qualifier_none, qualifier_none, qualifier_none,
-      qualifier_none, qualifier_none };
-#define TYPES_QUADOP (aarch64_types_quadop_qualifiers)
+      qualifier_none, qualifier_immediate };
+#define TYPES_TERNOP_LANE (aarch64_types_ternop_lane_qualifiers)
 
 static enum aarch64_type_qualifiers
 aarch64_types_getlane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
@@ -183,9 +189,14 @@
 #define TYPES_GETLANE (aarch64_types_getlane_qualifiers)
 #define TYPES_SHIFTIMM (aarch64_types_getlane_qualifiers)
 static enum aarch64_type_qualifiers
+aarch64_types_shift_to_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_unsigned, qualifier_none, qualifier_immediate };
+#define TYPES_SHIFTIMM_USS (aarch64_types_shift_to_unsigned_qualifiers)
+static enum aarch64_type_qualifiers
 aarch64_types_unsigned_shift_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate };
 #define TYPES_USHIFTIMM (aarch64_types_unsigned_shift_qualifiers)
+
 static enum aarch64_type_qualifiers
 aarch64_types_setlane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   = { qualifier_none, qualifier_none, qualifier_none, qualifier_immediate };
@@ -194,6 +205,13 @@
 #define TYPES_SHIFTACC (aarch64_types_setlane_qualifiers)
 
 static enum aarch64_type_qualifiers
+aarch64_types_unsigned_shiftacc_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned,
+      qualifier_immediate };
+#define TYPES_USHIFTACC (aarch64_types_unsigned_shiftacc_qualifiers)
+
+
+static enum aarch64_type_qualifiers
 aarch64_types_combine_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   = { qualifier_none, qualifier_none, qualifier_none };
 #define TYPES_COMBINE (aarch64_types_combine_qualifiers)
@@ -203,6 +221,11 @@
   = { qualifier_none, qualifier_const_pointer_map_mode };
 #define TYPES_LOAD1 (aarch64_types_load1_qualifiers)
 #define TYPES_LOADSTRUCT (aarch64_types_load1_qualifiers)
+static enum aarch64_type_qualifiers
+aarch64_types_loadstruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_const_pointer_map_mode,
+      qualifier_none, qualifier_none };
+#define TYPES_LOADSTRUCT_LANE (aarch64_types_loadstruct_lane_qualifiers)
 
 static enum aarch64_type_qualifiers
 aarch64_types_bsl_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
@@ -230,6 +253,11 @@
   = { qualifier_void, qualifier_pointer_map_mode, qualifier_none };
 #define TYPES_STORE1 (aarch64_types_store1_qualifiers)
 #define TYPES_STORESTRUCT (aarch64_types_store1_qualifiers)
+static enum aarch64_type_qualifiers
+aarch64_types_storestruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_void, qualifier_pointer_map_mode,
+      qualifier_none, qualifier_none };
+#define TYPES_STORESTRUCT_LANE (aarch64_types_storestruct_lane_qualifiers)
 
 #define CF0(N, X) CODE_FOR_aarch64_##N##X
 #define CF1(N, X) CODE_FOR_##N##X##1
@@ -239,7 +267,7 @@
 #define CF10(N, X) CODE_FOR_##N##X
 
 #define VAR1(T, N, MAP, A) \
-  {#N, UP (A), CF##MAP (N, A), 0, TYPES_##T},
+  {#N #A, UP (A), CF##MAP (N, A), 0, TYPES_##T},
 #define VAR2(T, N, MAP, A, B) \
   VAR1 (T, N, MAP, A) \
   VAR1 (T, N, MAP, B)
@@ -274,96 +302,34 @@
   VAR11 (T, N, MAP, A, B, C, D, E, F, G, H, I, J, K) \
   VAR1 (T, N, MAP, L)
 
-/* BUILTIN_<ITERATOR> macros should expand to cover the same range of
-   modes as is given for each define_mode_iterator in
-   config/aarch64/iterators.md.  */
+#include "aarch64-builtin-iterators.h"
 
-#define BUILTIN_DX(T, N, MAP) \
-  VAR2 (T, N, MAP, di, df)
-#define BUILTIN_GPF(T, N, MAP) \
-  VAR2 (T, N, MAP, sf, df)
-#define BUILTIN_SDQ_I(T, N, MAP) \
-  VAR4 (T, N, MAP, qi, hi, si, di)
-#define BUILTIN_SD_HSI(T, N, MAP) \
-  VAR2 (T, N, MAP, hi, si)
-#define BUILTIN_V2F(T, N, MAP) \
-  VAR2 (T, N, MAP, v2sf, v2df)
-#define BUILTIN_VALL(T, N, MAP) \
-  VAR10 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, \
-	 v4si, v2di, v2sf, v4sf, v2df)
-#define BUILTIN_VALLDI(T, N, MAP) \
-  VAR11 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, \
-	 v4si, v2di, v2sf, v4sf, v2df, di)
-#define BUILTIN_VALLDIF(T, N, MAP) \
-  VAR12 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, \
-	 v4si, v2di, v2sf, v4sf, v2df, di, df)
-#define BUILTIN_VB(T, N, MAP) \
-  VAR2 (T, N, MAP, v8qi, v16qi)
-#define BUILTIN_VD(T, N, MAP) \
-  VAR4 (T, N, MAP, v8qi, v4hi, v2si, v2sf)
-#define BUILTIN_VDC(T, N, MAP) \
-  VAR6 (T, N, MAP, v8qi, v4hi, v2si, v2sf, di, df)
-#define BUILTIN_VDIC(T, N, MAP) \
-  VAR3 (T, N, MAP, v8qi, v4hi, v2si)
-#define BUILTIN_VDN(T, N, MAP) \
-  VAR3 (T, N, MAP, v4hi, v2si, di)
-#define BUILTIN_VDQ(T, N, MAP) \
-  VAR7 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di)
-#define BUILTIN_VDQF(T, N, MAP) \
-  VAR3 (T, N, MAP, v2sf, v4sf, v2df)
-#define BUILTIN_VDQH(T, N, MAP) \
-  VAR2 (T, N, MAP, v4hi, v8hi)
-#define BUILTIN_VDQHS(T, N, MAP) \
-  VAR4 (T, N, MAP, v4hi, v8hi, v2si, v4si)
-#define BUILTIN_VDQIF(T, N, MAP) \
-  VAR9 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2sf, v4sf, v2df)
-#define BUILTIN_VDQM(T, N, MAP) \
-  VAR6 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si)
-#define BUILTIN_VDQV(T, N, MAP) \
-  VAR5 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v4si)
-#define BUILTIN_VDQQH(T, N, MAP) \
-  VAR4 (T, N, MAP, v8qi, v16qi, v4hi, v8hi)
-#define BUILTIN_VDQ_BHSI(T, N, MAP) \
-  VAR6 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si)
-#define BUILTIN_VDQ_I(T, N, MAP) \
-  VAR7 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di)
-#define BUILTIN_VDW(T, N, MAP) \
-  VAR3 (T, N, MAP, v8qi, v4hi, v2si)
-#define BUILTIN_VD_BHSI(T, N, MAP) \
-  VAR3 (T, N, MAP, v8qi, v4hi, v2si)
-#define BUILTIN_VD_HSI(T, N, MAP) \
-  VAR2 (T, N, MAP, v4hi, v2si)
-#define BUILTIN_VD_RE(T, N, MAP) \
-  VAR6 (T, N, MAP, v8qi, v4hi, v2si, v2sf, di, df)
-#define BUILTIN_VQ(T, N, MAP) \
-  VAR6 (T, N, MAP, v16qi, v8hi, v4si, v2di, v4sf, v2df)
-#define BUILTIN_VQN(T, N, MAP) \
-  VAR3 (T, N, MAP, v8hi, v4si, v2di)
-#define BUILTIN_VQW(T, N, MAP) \
-  VAR3 (T, N, MAP, v16qi, v8hi, v4si)
-#define BUILTIN_VQ_HSI(T, N, MAP) \
-  VAR2 (T, N, MAP, v8hi, v4si)
-#define BUILTIN_VQ_S(T, N, MAP) \
-  VAR6 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si)
-#define BUILTIN_VSDQ_HSI(T, N, MAP) \
-  VAR6 (T, N, MAP, v4hi, v8hi, v2si, v4si, hi, si)
-#define BUILTIN_VSDQ_I(T, N, MAP) \
-  VAR11 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, qi, hi, si, di)
-#define BUILTIN_VSDQ_I_BHSI(T, N, MAP) \
-  VAR10 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, qi, hi, si)
-#define BUILTIN_VSDQ_I_DI(T, N, MAP) \
-  VAR8 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, di)
-#define BUILTIN_VSD_HSI(T, N, MAP) \
-  VAR4 (T, N, MAP, v4hi, v2si, hi, si)
-#define BUILTIN_VSQN_HSDI(T, N, MAP) \
-  VAR6 (T, N, MAP, v8hi, v4si, v2di, hi, si, di)
-#define BUILTIN_VSTRUCT(T, N, MAP) \
-  VAR3 (T, N, MAP, oi, ci, xi)
-
 static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = {
 #include "aarch64-simd-builtins.def"
 };
 
+/* There's only 8 CRC32 builtins.  Probably not worth their own .def file.  */
+#define AARCH64_CRC32_BUILTINS \
+  CRC32_BUILTIN (crc32b, QI) \
+  CRC32_BUILTIN (crc32h, HI) \
+  CRC32_BUILTIN (crc32w, SI) \
+  CRC32_BUILTIN (crc32x, DI) \
+  CRC32_BUILTIN (crc32cb, QI) \
+  CRC32_BUILTIN (crc32ch, HI) \
+  CRC32_BUILTIN (crc32cw, SI) \
+  CRC32_BUILTIN (crc32cx, DI)
+
+typedef struct
+{
+  const char *name;
+  enum machine_mode mode;
+  const enum insn_code icode;
+  unsigned int fcode;
+} aarch64_crc_builtin_datum;
+
+#define CRC32_BUILTIN(N, M) \
+  AARCH64_BUILTIN_##N,
+
 #undef VAR1
 #define VAR1(T, N, MAP, A) \
   AARCH64_SIMD_BUILTIN_##T##_##N##A,
@@ -371,13 +337,32 @@
 enum aarch64_builtins
 {
   AARCH64_BUILTIN_MIN,
+
+  AARCH64_BUILTIN_GET_FPCR,
+  AARCH64_BUILTIN_SET_FPCR,
+  AARCH64_BUILTIN_GET_FPSR,
+  AARCH64_BUILTIN_SET_FPSR,
+
   AARCH64_SIMD_BUILTIN_BASE,
 #include "aarch64-simd-builtins.def"
   AARCH64_SIMD_BUILTIN_MAX = AARCH64_SIMD_BUILTIN_BASE
 			      + ARRAY_SIZE (aarch64_simd_builtin_data),
+  AARCH64_CRC32_BUILTIN_BASE,
+  AARCH64_CRC32_BUILTINS
+  AARCH64_CRC32_BUILTIN_MAX,
   AARCH64_BUILTIN_MAX
 };
 
+#undef CRC32_BUILTIN
+#define CRC32_BUILTIN(N, M) \
+  {"__builtin_aarch64_"#N, M##mode, CODE_FOR_aarch64_##N, AARCH64_BUILTIN_##N},
+
+static aarch64_crc_builtin_datum aarch64_crc_builtin_data[] = {
+  AARCH64_CRC32_BUILTINS
+};
+
+#undef CRC32_BUILTIN
+
 static GTY(()) tree aarch64_builtin_decls[AARCH64_BUILTIN_MAX];
 
 #define NUM_DREG_TYPES 6
@@ -639,25 +624,10 @@
       bool print_type_signature_p = false;
       char type_signature[SIMD_MAX_BUILTIN_ARGS] = { 0 };
       aarch64_simd_builtin_datum *d = &aarch64_simd_builtin_data[i];
-      const char *const modenames[] =
-	{
-	  "v8qi", "v4hi", "v2si", "v2sf", "di", "df",
-	  "v16qi", "v8hi", "v4si", "v4sf", "v2di", "v2df",
-	  "ti", "ei", "oi", "xi", "si", "sf", "hi", "qi"
-	};
-      const enum machine_mode modes[] =
-	{
-	  V8QImode, V4HImode, V2SImode, V2SFmode, DImode, DFmode,
-	  V16QImode, V8HImode, V4SImode, V4SFmode, V2DImode,
-	  V2DFmode, TImode, EImode, OImode, XImode, SImode,
-	  SFmode, HImode, QImode
-	};
       char namebuf[60];
       tree ftype = NULL;
       tree fndecl = NULL;
 
-      gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
-
       d->fcode = fcode;
 
       /* We must track two variables here.  op_num is
@@ -705,7 +675,7 @@
 	  /* Some builtins have different user-facing types
 	     for certain arguments, encoded in d->mode.  */
 	  if (qualifiers & qualifier_map_mode)
-	      op_mode = modes[d->mode];
+	      op_mode = d->mode;
 
 	  /* For pointers, we want a pointer to the basic type
 	     of the vector.  */
@@ -737,11 +707,11 @@
       gcc_assert (ftype != NULL);
 
       if (print_type_signature_p)
-	snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s%s_%s",
-		  d->name, modenames[d->mode], type_signature);
+	snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s_%s",
+		  d->name, type_signature);
       else
-	snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s%s",
-		  d->name, modenames[d->mode]);
+	snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s",
+		  d->name);
 
       fndecl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD,
 				     NULL, NULL_TREE);
@@ -749,11 +719,49 @@
     }
 }
 
+static void
+aarch64_init_crc32_builtins ()
+{
+  tree usi_type = aarch64_build_unsigned_type (SImode);
+  unsigned int i = 0;
+
+  for (i = 0; i < ARRAY_SIZE (aarch64_crc_builtin_data); ++i)
+    {
+      aarch64_crc_builtin_datum* d = &aarch64_crc_builtin_data[i];
+      tree argtype = aarch64_build_unsigned_type (d->mode);
+      tree ftype = build_function_type_list (usi_type, usi_type, argtype, NULL_TREE);
+      tree fndecl = add_builtin_function (d->name, ftype, d->fcode,
+                                          BUILT_IN_MD, NULL, NULL_TREE);
+
+      aarch64_builtin_decls[d->fcode] = fndecl;
+    }
+}
+
 void
 aarch64_init_builtins (void)
 {
+  tree ftype_set_fpr
+    = build_function_type_list (void_type_node, unsigned_type_node, NULL);
+  tree ftype_get_fpr
+    = build_function_type_list (unsigned_type_node, NULL);
+
+  aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR]
+    = add_builtin_function ("__builtin_aarch64_get_fpcr", ftype_get_fpr,
+			    AARCH64_BUILTIN_GET_FPCR, BUILT_IN_MD, NULL, NULL_TREE);
+  aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR]
+    = add_builtin_function ("__builtin_aarch64_set_fpcr", ftype_set_fpr,
+			    AARCH64_BUILTIN_SET_FPCR, BUILT_IN_MD, NULL, NULL_TREE);
+  aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR]
+    = add_builtin_function ("__builtin_aarch64_get_fpsr", ftype_get_fpr,
+			    AARCH64_BUILTIN_GET_FPSR, BUILT_IN_MD, NULL, NULL_TREE);
+  aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR]
+    = add_builtin_function ("__builtin_aarch64_set_fpsr", ftype_set_fpr,
+			    AARCH64_BUILTIN_SET_FPSR, BUILT_IN_MD, NULL, NULL_TREE);
+
   if (TARGET_SIMD)
     aarch64_init_simd_builtins ();
+  if (TARGET_CRC32)
+    aarch64_init_crc32_builtins ();
 }
 
 tree
@@ -774,9 +782,8 @@
 
 static rtx
 aarch64_simd_expand_args (rtx target, int icode, int have_retval,
-			  tree exp, ...)
+			  tree exp, builtin_simd_arg *args)
 {
-  va_list ap;
   rtx pat;
   tree arg[SIMD_MAX_BUILTIN_ARGS];
   rtx op[SIMD_MAX_BUILTIN_ARGS];
@@ -790,11 +797,9 @@
 	  || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
     target = gen_reg_rtx (tmode);
 
-  va_start (ap, exp);
-
   for (;;)
     {
-      builtin_simd_arg thisarg = (builtin_simd_arg) va_arg (ap, int);
+      builtin_simd_arg thisarg = args[argc];
 
       if (thisarg == SIMD_ARG_STOP)
 	break;
@@ -818,8 +823,11 @@
 	    case SIMD_ARG_CONSTANT:
 	      if (!(*insn_data[icode].operand[argc + have_retval].predicate)
 		  (op[argc], mode[argc]))
+	      {
 		error_at (EXPR_LOCATION (exp), "incompatible type for argument %d, "
 		       "expected %<const int%>", argc + 1);
+		return const0_rtx;
+	      }
 	      break;
 
 	    case SIMD_ARG_STOP:
@@ -830,8 +838,6 @@
 	}
     }
 
-  va_end (ap);
-
   if (have_retval)
     switch (argc)
       {
@@ -886,7 +892,7 @@
       }
 
   if (!pat)
-    return 0;
+    return NULL_RTX;
 
   emit_insn (pat);
 
@@ -945,14 +951,45 @@
   /* The interface to aarch64_simd_expand_args expects a 0 if
      the function is void, and a 1 if it is not.  */
   return aarch64_simd_expand_args
-	  (target, icode, !is_void, exp,
-	   args[1],
-	   args[2],
-	   args[3],
-	   args[4],
-	   SIMD_ARG_STOP);
+	  (target, icode, !is_void, exp, &args[1]);
 }
 
+rtx
+aarch64_crc32_expand_builtin (int fcode, tree exp, rtx target)
+{
+  rtx pat;
+  aarch64_crc_builtin_datum *d
+    = &aarch64_crc_builtin_data[fcode - (AARCH64_CRC32_BUILTIN_BASE + 1)];
+  enum insn_code icode = d->icode;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+  enum machine_mode tmode = insn_data[icode].operand[0].mode;
+  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+  enum machine_mode mode1 = insn_data[icode].operand[2].mode;
+
+  if (! target
+      || GET_MODE (target) != tmode
+      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
+	      && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
+
+  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+  if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
+    op1 = copy_to_mode_reg (mode1, op1);
+
+  pat = GEN_FCN (icode) (target, op0, op1);
+  if (!pat)
+    return NULL_RTX;
+
+  emit_insn (pat);
+  return target;
+}
+
 /* Expand an expression EXP that calls a built-in function,
    with result going to TARGET if that's convenient.  */
 rtx
@@ -964,11 +1001,43 @@
 {
   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
   int fcode = DECL_FUNCTION_CODE (fndecl);
+  int icode;
+  rtx pat, op0;
+  tree arg0;
 
-  if (fcode >= AARCH64_SIMD_BUILTIN_BASE)
+  switch (fcode)
+    {
+    case AARCH64_BUILTIN_GET_FPCR:
+    case AARCH64_BUILTIN_SET_FPCR:
+    case AARCH64_BUILTIN_GET_FPSR:
+    case AARCH64_BUILTIN_SET_FPSR:
+      if ((fcode == AARCH64_BUILTIN_GET_FPCR)
+	  || (fcode == AARCH64_BUILTIN_GET_FPSR))
+	{
+	  icode = (fcode == AARCH64_BUILTIN_GET_FPSR) ?
+	    CODE_FOR_get_fpsr : CODE_FOR_get_fpcr;
+	  target = gen_reg_rtx (SImode);
+	  pat = GEN_FCN (icode) (target);
+	}
+      else
+	{
+	  target = NULL_RTX;
+	  icode = (fcode == AARCH64_BUILTIN_SET_FPSR) ?
+	    CODE_FOR_set_fpsr : CODE_FOR_set_fpcr;
+	  arg0 = CALL_EXPR_ARG (exp, 0);
+	  op0 = expand_normal (arg0);
+	  pat = GEN_FCN (icode) (op0);
+	}
+      emit_insn (pat);
+      return target;
+    }
+
+  if (fcode >= AARCH64_SIMD_BUILTIN_BASE && fcode <= AARCH64_SIMD_BUILTIN_MAX)
     return aarch64_simd_expand_builtin (fcode, exp, target);
+  else if (fcode >= AARCH64_CRC32_BUILTIN_BASE && fcode <= AARCH64_CRC32_BUILTIN_MAX)
+    return aarch64_crc32_expand_builtin (fcode, exp, target);
 
-  return NULL_RTX;
+  gcc_unreachable ();
 }
 
 tree
@@ -1033,6 +1102,14 @@
               return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_clzv4si];
             return NULL_TREE;
           }
+	case BUILT_IN_CTZ:
+          {
+	    if (AARCH64_CHECK_BUILTIN_MODE (2, S))
+	      return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_ctzv2si];
+	    else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
+	      return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_ctzv4si];
+	    return NULL_TREE;
+          }
 #undef AARCH64_CHECK_BUILTIN_MODE
 #define AARCH64_CHECK_BUILTIN_MODE(C, N) \
   (out_mode == N##Imode && out_n == C \
@@ -1086,7 +1163,29 @@
 
 	    return aarch64_builtin_decls[builtin];
 	  }
-
+	case BUILT_IN_BSWAP16:
+#undef AARCH64_CHECK_BUILTIN_MODE
+#define AARCH64_CHECK_BUILTIN_MODE(C, N) \
+  (out_mode == N##Imode && out_n == C \
+   && in_mode == N##Imode && in_n == C)
+	  if (AARCH64_CHECK_BUILTIN_MODE (4, H))
+	    return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv4hi];
+	  else if (AARCH64_CHECK_BUILTIN_MODE (8, H))
+	    return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv8hi];
+	  else
+	    return NULL_TREE;
+	case BUILT_IN_BSWAP32:
+	  if (AARCH64_CHECK_BUILTIN_MODE (2, S))
+	    return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv2si];
+	  else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
+	    return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv4si];
+	  else
+	    return NULL_TREE;
+	case BUILT_IN_BSWAP64:
+	  if (AARCH64_CHECK_BUILTIN_MODE (2, D))
+	    return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv2di];
+	  else
+	    return NULL_TREE;
 	default:
 	  return NULL_TREE;
       }
@@ -1108,25 +1207,28 @@
 
   switch (fcode)
     {
-      BUILTIN_VALLDI (UNOP, abs, 2)
+      BUILTIN_VDQF (UNOP, abs, 2)
 	return fold_build1 (ABS_EXPR, type, args[0]);
 	break;
-      BUILTIN_VALLDI (BINOP, cmge, 0)
-	return fold_build2 (GE_EXPR, type, args[0], args[1]);
-	break;
-      BUILTIN_VALLDI (BINOP, cmgt, 0)
-	return fold_build2 (GT_EXPR, type, args[0], args[1]);
-	break;
-      BUILTIN_VALLDI (BINOP, cmeq, 0)
-	return fold_build2 (EQ_EXPR, type, args[0], args[1]);
-	break;
-      BUILTIN_VSDQ_I_DI (BINOP, cmtst, 0)
-	{
-	  tree and_node = fold_build2 (BIT_AND_EXPR, type, args[0], args[1]);
-	  tree vec_zero_node = build_zero_cst (type);
-	  return fold_build2 (NE_EXPR, type, and_node, vec_zero_node);
-	  break;
-	}
+      VAR1 (REINTERP_SS, reinterpretdi, 0, df)
+      VAR1 (REINTERP_SS, reinterpretv8qi, 0, df)
+      VAR1 (REINTERP_SS, reinterpretv4hi, 0, df)
+      VAR1 (REINTERP_SS, reinterpretv2si, 0, df)
+      VAR1 (REINTERP_SS, reinterpretv2sf, 0, df)
+      BUILTIN_VD (REINTERP_SS, reinterpretdf, 0)
+      BUILTIN_VD (REINTERP_SU, reinterpretdf, 0)
+      VAR1 (REINTERP_US, reinterpretdi, 0, df)
+      VAR1 (REINTERP_US, reinterpretv8qi, 0, df)
+      VAR1 (REINTERP_US, reinterpretv4hi, 0, df)
+      VAR1 (REINTERP_US, reinterpretv2si, 0, df)
+      VAR1 (REINTERP_US, reinterpretv2sf, 0, df)
+      BUILTIN_VD (REINTERP_SP, reinterpretdf, 0)
+      VAR1 (REINTERP_PS, reinterpretdi, 0, df)
+      VAR1 (REINTERP_PS, reinterpretv8qi, 0, df)
+      VAR1 (REINTERP_PS, reinterpretv4hi, 0, df)
+      VAR1 (REINTERP_PS, reinterpretv2si, 0, df)
+      VAR1 (REINTERP_PS, reinterpretv2sf, 0, df)
+	return fold_build1 (VIEW_CONVERT_EXPR, type, args[0]);
       VAR1 (UNOP, floatv2si, 2, v2sf)
       VAR1 (UNOP, floatv4si, 2, v4sf)
       VAR1 (UNOP, floatv2di, 2, v2df)
@@ -1146,6 +1248,7 @@
   tree call = gimple_call_fn (stmt);
   tree fndecl;
   gimple new_stmt = NULL;
+
   if (call)
     {
       fndecl = gimple_call_fndecl (stmt);
@@ -1157,16 +1260,20 @@
 			? gimple_call_arg_ptr (stmt, 0)
 			: &error_mark_node);
 
+	  /* We use gimple's REDUC_(PLUS|MIN|MAX)_EXPRs for float, signed int
+	     and unsigned int; it will distinguish according to the types of
+	     the arguments to the __builtin.  */
 	  switch (fcode)
 	    {
-	      BUILTIN_VALL (UNOP, reduc_splus_, 10)
-		new_stmt = gimple_build_assign_with_ops (
+	      BUILTIN_VALL (UNOP, reduc_plus_scal_, 10)
+	        new_stmt = gimple_build_assign_with_ops (
 						REDUC_PLUS_EXPR,
 						gimple_call_lhs (stmt),
 						args[0],
 						NULL_TREE);
 		break;
-	      BUILTIN_VDQIF (UNOP, reduc_smax_, 10)
+	      BUILTIN_VDQIF (UNOP, reduc_smax_scal_, 10)
+	      BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10)
 		new_stmt = gimple_build_assign_with_ops (
 						REDUC_MAX_EXPR,
 						gimple_call_lhs (stmt),
@@ -1173,7 +1280,8 @@
 						args[0],
 						NULL_TREE);
 		break;
-	      BUILTIN_VDQIF (UNOP, reduc_smin_, 10)
+	      BUILTIN_VDQIF (UNOP, reduc_smin_scal_, 10)
+	      BUILTIN_VDQ_BHSI (UNOPU, reduc_umin_scal_, 10)
 		new_stmt = gimple_build_assign_with_ops (
 						REDUC_MIN_EXPR,
 						gimple_call_lhs (stmt),
@@ -1196,43 +1304,108 @@
   return changed;
 }
 
+void
+aarch64_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
+{
+  const unsigned AARCH64_FE_INVALID = 1;
+  const unsigned AARCH64_FE_DIVBYZERO = 2;
+  const unsigned AARCH64_FE_OVERFLOW = 4;
+  const unsigned AARCH64_FE_UNDERFLOW = 8;
+  const unsigned AARCH64_FE_INEXACT = 16;
+  const unsigned HOST_WIDE_INT AARCH64_FE_ALL_EXCEPT = (AARCH64_FE_INVALID
+							| AARCH64_FE_DIVBYZERO
+							| AARCH64_FE_OVERFLOW
+							| AARCH64_FE_UNDERFLOW
+							| AARCH64_FE_INEXACT);
+  const unsigned HOST_WIDE_INT AARCH64_FE_EXCEPT_SHIFT = 8;
+  tree fenv_cr, fenv_sr, get_fpcr, set_fpcr, mask_cr, mask_sr;
+  tree ld_fenv_cr, ld_fenv_sr, masked_fenv_cr, masked_fenv_sr, hold_fnclex_cr;
+  tree hold_fnclex_sr, new_fenv_var, reload_fenv, restore_fnenv, get_fpsr, set_fpsr;
+  tree update_call, atomic_feraiseexcept, hold_fnclex, masked_fenv, ld_fenv;
+
+  /* Generate the equivalence of :
+       unsigned int fenv_cr;
+       fenv_cr = __builtin_aarch64_get_fpcr ();
+
+       unsigned int fenv_sr;
+       fenv_sr = __builtin_aarch64_get_fpsr ();
+
+       Now set all exceptions to non-stop
+       unsigned int mask_cr
+		= ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT);
+       unsigned int masked_cr;
+       masked_cr = fenv_cr & mask_cr;
+
+       And clear all exception flags
+       unsigned int maske_sr = ~AARCH64_FE_ALL_EXCEPT;
+       unsigned int masked_cr;
+       masked_sr = fenv_sr & mask_sr;
+
+       __builtin_aarch64_set_cr (masked_cr);
+       __builtin_aarch64_set_sr (masked_sr);  */
+
+  fenv_cr = create_tmp_var (unsigned_type_node, NULL);
+  fenv_sr = create_tmp_var (unsigned_type_node, NULL);
+
+  get_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR];
+  set_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR];
+  get_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR];
+  set_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR];
+
+  mask_cr = build_int_cst (unsigned_type_node,
+			   ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT));
+  mask_sr = build_int_cst (unsigned_type_node,
+			   ~(AARCH64_FE_ALL_EXCEPT));
+
+  ld_fenv_cr = build2 (MODIFY_EXPR, unsigned_type_node,
+		    fenv_cr, build_call_expr (get_fpcr, 0));
+  ld_fenv_sr = build2 (MODIFY_EXPR, unsigned_type_node,
+		    fenv_sr, build_call_expr (get_fpsr, 0));
+
+  masked_fenv_cr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_cr, mask_cr);
+  masked_fenv_sr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_sr, mask_sr);
+
+  hold_fnclex_cr = build_call_expr (set_fpcr, 1, masked_fenv_cr);
+  hold_fnclex_sr = build_call_expr (set_fpsr, 1, masked_fenv_sr);
+
+  hold_fnclex = build2 (COMPOUND_EXPR, void_type_node, hold_fnclex_cr,
+			hold_fnclex_sr);
+  masked_fenv = build2 (COMPOUND_EXPR, void_type_node, masked_fenv_cr,
+			masked_fenv_sr);
+  ld_fenv = build2 (COMPOUND_EXPR, void_type_node, ld_fenv_cr, ld_fenv_sr);
+
+  *hold = build2 (COMPOUND_EXPR, void_type_node,
+		  build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
+		  hold_fnclex);
+
+  /* Store the value of masked_fenv to clear the exceptions:
+     __builtin_aarch64_set_fpsr (masked_fenv_sr);  */
+
+  *clear = build_call_expr (set_fpsr, 1, masked_fenv_sr);
+
+  /* Generate the equivalent of :
+       unsigned int new_fenv_var;
+       new_fenv_var = __builtin_aarch64_get_fpsr ();
+
+       __builtin_aarch64_set_fpsr (fenv_sr);
+
+       __atomic_feraiseexcept (new_fenv_var);  */
+
+  new_fenv_var = create_tmp_var (unsigned_type_node, NULL);
+  reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node,
+			new_fenv_var, build_call_expr (get_fpsr, 0));
+  restore_fnenv = build_call_expr (set_fpsr, 1, fenv_sr);
+  atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
+  update_call = build_call_expr (atomic_feraiseexcept, 1,
+				 fold_convert (integer_type_node, new_fenv_var));
+  *update = build2 (COMPOUND_EXPR, void_type_node,
+		    build2 (COMPOUND_EXPR, void_type_node,
+			    reload_fenv, restore_fnenv), update_call);
+}
+
+
 #undef AARCH64_CHECK_BUILTIN_MODE
 #undef AARCH64_FIND_FRINT_VARIANT
-#undef BUILTIN_DX
-#undef BUILTIN_SDQ_I
-#undef BUILTIN_SD_HSI
-#undef BUILTIN_V2F
-#undef BUILTIN_VALL
-#undef BUILTIN_VB
-#undef BUILTIN_VD
-#undef BUILTIN_VDC
-#undef BUILTIN_VDIC
-#undef BUILTIN_VDN
-#undef BUILTIN_VDQ
-#undef BUILTIN_VDQF
-#undef BUILTIN_VDQH
-#undef BUILTIN_VDQHS
-#undef BUILTIN_VDQIF
-#undef BUILTIN_VDQM
-#undef BUILTIN_VDQV
-#undef BUILTIN_VDQ_BHSI
-#undef BUILTIN_VDQ_I
-#undef BUILTIN_VDW
-#undef BUILTIN_VD_BHSI
-#undef BUILTIN_VD_HSI
-#undef BUILTIN_VD_RE
-#undef BUILTIN_VQ
-#undef BUILTIN_VQN
-#undef BUILTIN_VQW
-#undef BUILTIN_VQ_HSI
-#undef BUILTIN_VQ_S
-#undef BUILTIN_VSDQ_HSI
-#undef BUILTIN_VSDQ_I
-#undef BUILTIN_VSDQ_I_BHSI
-#undef BUILTIN_VSDQ_I_DI
-#undef BUILTIN_VSD_HSI
-#undef BUILTIN_VSQN_HSDI
-#undef BUILTIN_VSTRUCT
 #undef CF0
 #undef CF1
 #undef CF2
@@ -1251,3 +1424,4 @@
 #undef VAR10
 #undef VAR11
 
+#include "gt-aarch64-builtins.h"
--- a/src/gcc/config/aarch64/thunderx.md
+++ b/src/gcc/config/aarch64/thunderx.md
@@ -0,0 +1,260 @@
+;; Cavium ThunderX pipeline description
+;; Copyright (C) 2014 Free Software Foundation, Inc.
+;;
+;; Written by Andrew Pinski  <apinski@cavium.com>
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+;;   Copyright (C) 2004, 2005, 2006 Cavium Networks.
+
+
+;; Thunder is a dual-issue processor that can issue all instructions on
+;; pipe0 and a subset on pipe1.
+
+
+(define_automaton "thunderx_main, thunderx_mult, thunderx_divide, thunderx_simd")
+
+(define_cpu_unit "thunderx_pipe0" "thunderx_main")
+(define_cpu_unit "thunderx_pipe1" "thunderx_main")
+(define_cpu_unit "thunderx_mult" "thunderx_mult")
+(define_cpu_unit "thunderx_divide" "thunderx_divide")
+(define_cpu_unit "thunderx_simd" "thunderx_simd")
+
+(define_insn_reservation "thunderx_add" 1
+  (and (eq_attr "tune" "thunderx")
+       (eq_attr "type" "adc_imm,adc_reg,adr,alu_imm,alu_reg,alus_imm,alus_reg,extend,logic_imm,logic_reg,logics_imm,logics_reg,mov_imm,mov_reg"))
+  "thunderx_pipe0 | thunderx_pipe1")
+
+(define_insn_reservation "thunderx_shift" 1
+  (and (eq_attr "tune" "thunderx")
+       (eq_attr "type" "bfm,extend,shift_imm,shift_reg"))
+  "thunderx_pipe0 | thunderx_pipe1")
+
+
+;; Arthimentic instructions with an extra shift or extend is two cycles.
+;; FIXME: This needs more attributes on aarch64 than what is currently there;
+;;    this is conserative for now.
+;; Except this is not correct as this is only for !(LSL && shift by 0/1/2/3)
+;; Except this is not correct as this is only for !(zero extend)
+
+(define_insn_reservation "thunderx_arith_shift" 2
+  (and (eq_attr "tune" "thunderx")
+       (eq_attr "type" "alu_ext,alu_shift_imm,alu_shift_reg,alus_ext,logic_shift_imm,logic_shift_reg,logics_shift_imm,logics_shift_reg,alus_shift_imm"))
+  "thunderx_pipe0 | thunderx_pipe1")
+
+(define_insn_reservation "thunderx_csel" 2
+  (and (eq_attr "tune" "thunderx")
+       (eq_attr "type" "csel"))
+  "thunderx_pipe0 | thunderx_pipe1")
+
+;; Multiply and mulitply accumulate and count leading zeros can only happen on pipe 1
+
+(define_insn_reservation "thunderx_mul" 4
+  (and (eq_attr "tune" "thunderx")
+       (eq_attr "type" "mul,muls,mla,mlas,clz,smull,umull,smlal,umlal"))
+  "thunderx_pipe1 + thunderx_mult")
+
+;; Multiply high instructions take an extra cycle and cause the muliply unit to
+;; be busy for an extra cycle.
+
+;(define_insn_reservation "thunderx_mul_high" 5
+;  (and (eq_attr "tune" "thunderx")
+;       (eq_attr "type" "smull,umull"))
+;  "thunderx_pipe1 + thunderx_mult")
+
+(define_insn_reservation "thunderx_div32" 22
+  (and (eq_attr "tune" "thunderx")
+       (eq_attr "type" "udiv,sdiv"))
+  "thunderx_pipe1 + thunderx_divide, thunderx_divide * 21")
+
+;(define_insn_reservation "thunderx_div64" 38
+;  (and (eq_attr "tune" "thunderx")
+;       (eq_attr "type" "udiv,sdiv")
+;       (eq_attr "mode" "DI"))
+;  "thunderx_pipe1 + thunderx_divide, thunderx_divide * 34")
+
+;; Stores take one cycle in pipe 0
+(define_insn_reservation "thunderx_store" 1
+  (and (eq_attr "tune" "thunderx")
+       (eq_attr "type" "store1"))
+  "thunderx_pipe0")
+
+;; Store pair are single issued
+(define_insn_reservation "thunderx_storepair" 1
+  (and (eq_attr "tune" "thunderx")
+       (eq_attr "type" "store2"))
+  "thunderx_pipe0 + thunderx_pipe1")
+
+
+;; loads (and load pairs) from L1 take 3 cycles in pipe 0
+(define_insn_reservation "thunderx_load" 3
+  (and (eq_attr "tune" "thunderx")
+       (eq_attr "type" "load1, load2"))
+  "thunderx_pipe0")
+
+(define_insn_reservation "thunderx_brj" 1
+  (and (eq_attr "tune" "thunderx")
+       (eq_attr "type" "branch,trap,call"))
+  "thunderx_pipe1")
+
+;; FPU
+
+(define_insn_reservation "thunderx_fadd" 4
+  (and (eq_attr "tune" "thunderx")
+       (eq_attr "type" "faddd,fadds"))
+  "thunderx_pipe1")
+
+(define_insn_reservation "thunderx_fconst" 1
+  (and (eq_attr "tune" "thunderx")
+       (eq_attr "type" "fconsts,fconstd"))
+  "thunderx_pipe1")
+
+;; Moves between fp are 2 cycles including min/max/select/abs/neg
+(define_insn_reservation "thunderx_fmov" 2
+  (and (eq_attr "tune" "thunderx")
+       (eq_attr "type" "fmov,f_minmaxs,f_minmaxd,fcsel,ffarithd,ffariths"))
+  "thunderx_pipe1")
+
+(define_insn_reservation "thunderx_fmovgpr" 2
+  (and (eq_attr "tune" "thunderx")
+       (eq_attr "type" "f_mrc, f_mcr"))
+  "thunderx_pipe1")
+
+(define_insn_reservation "thunderx_fmul" 6
+  (and (eq_attr "tune" "thunderx")
+       (eq_attr "type" "fmacs,fmacd,fmuls,fmuld"))
+  "thunderx_pipe1")
+
+(define_insn_reservation "thunderx_fdivs" 12
+  (and (eq_attr "tune" "thunderx")
+       (eq_attr "type" "fdivs"))
+  "thunderx_pipe1 + thunderx_divide, thunderx_divide*8")
+
+(define_insn_reservation "thunderx_fdivd" 22
+  (and (eq_attr "tune" "thunderx")
+       (eq_attr "type" "fdivd"))
+  "thunderx_pipe1 + thunderx_divide, thunderx_divide*18")
+
+(define_insn_reservation "thunderx_fsqrts" 17
+  (and (eq_attr "tune" "thunderx")
+       (eq_attr "type" "fsqrts"))
+  "thunderx_pipe1 + thunderx_divide, thunderx_divide*13")
+
+(define_insn_reservation "thunderx_fsqrtd" 28
+  (and (eq_attr "tune" "thunderx")
+       (eq_attr "type" "fsqrtd"))
+  "thunderx_pipe1 + thunderx_divide, thunderx_divide*31")
+
+;; The rounding conversion inside fp is 4 cycles
+(define_insn_reservation "thunderx_frint" 4
+  (and (eq_attr "tune" "thunderx")
+       (eq_attr "type" "f_rints,f_rintd"))
+  "thunderx_pipe1")
+
+;; Float to integer with a move from int to/from float is 6 cycles
+(define_insn_reservation "thunderx_f_cvt" 6
+  (and (eq_attr "tune" "thunderx")
+       (eq_attr "type" "f_cvt,f_cvtf2i,f_cvti2f"))
+  "thunderx_pipe1")
+
+;; FP/SIMD load/stores happen in pipe 0
+;; 64bit Loads register/pairs are 4 cycles from L1
+(define_insn_reservation "thunderx_64simd_fp_load" 4
+  (and (eq_attr "tune" "thunderx")
+       (eq_attr "type" "f_loadd,f_loads,neon_load1_1reg,\
+			neon_load1_1reg_q,neon_load1_2reg"))
+  "thunderx_pipe0")
+
+;; 128bit load pair is singled issue and 4 cycles from L1
+(define_insn_reservation "thunderx_128simd_pair_load" 4
+  (and (eq_attr "tune" "thunderx")
+       (eq_attr "type" "neon_load1_2reg_q"))
+  "thunderx_pipe0+thunderx_pipe1")
+
+;; FP/SIMD Stores takes one cycle in pipe 0
+(define_insn_reservation "thunderx_simd_fp_store" 1
+  (and (eq_attr "tune" "thunderx")
+       (eq_attr "type" "f_stored,f_stores,neon_store1_1reg,neon_store1_1reg_q"))
+  "thunderx_pipe0")
+
+;; 64bit neon store pairs are single issue for one cycle
+(define_insn_reservation "thunderx_64neon_storepair" 1
+  (and (eq_attr "tune" "thunderx")
+       (eq_attr "type" "neon_store1_2reg"))
+  "thunderx_pipe0 + thunderx_pipe1")
+
+;; 128bit neon store pair are single issued for two cycles
+(define_insn_reservation "thunderx_128neon_storepair" 2
+  (and (eq_attr "tune" "thunderx")
+       (eq_attr "type" "neon_store1_2reg_q"))
+  "(thunderx_pipe0 + thunderx_pipe1)*2")
+
+
+;; SIMD/NEON (q forms take an extra cycle)
+
+;; Thunder simd move instruction types - 2/3 cycles
+(define_insn_reservation "thunderx_neon_move" 2
+  (and (eq_attr "tune" "thunderx")
+       (eq_attr "type" "neon_logic, neon_bsl, neon_fp_compare_s, \
+			neon_fp_compare_d, neon_move"))
+  "thunderx_pipe1 + thunderx_simd")
+
+(define_insn_reservation "thunderx_neon_move_q" 3
+  (and (eq_attr "tune" "thunderx")
+       (eq_attr "type" "neon_logic_q, neon_bsl_q, neon_fp_compare_s_q, \
+			neon_fp_compare_d_q, neon_move_q"))
+  "thunderx_pipe1 + thunderx_simd, thunderx_simd")
+
+
+;; Thunder simd simple/add instruction types - 4/5 cycles
+
+(define_insn_reservation "thunderx_neon_add" 4
+  (and (eq_attr "tune" "thunderx")
+       (eq_attr "type" "neon_reduc_add, neon_reduc_minmax, neon_fp_reduc_add_s, \
+			neon_fp_reduc_add_d, neon_fp_to_int_s, neon_fp_to_int_d, \
+			neon_add_halve, neon_sub_halve, neon_qadd, neon_compare, \
+			neon_compare_zero, neon_minmax, neon_abd, neon_add, neon_sub, \
+			neon_fp_minmax_s, neon_fp_minmax_d, neon_reduc_add, neon_cls, \
+			neon_qabs, neon_qneg, neon_fp_addsub_s, neon_fp_addsub_d"))
+  "thunderx_pipe1 + thunderx_simd")
+
+;; BIG NOTE: neon_add_long/neon_sub_long don't have a q form which is incorrect
+
+(define_insn_reservation "thunderx_neon_add_q" 5
+  (and (eq_attr "tune" "thunderx")
+       (eq_attr "type" "neon_reduc_add_q, neon_reduc_minmax_q, neon_fp_reduc_add_s_q, \
+			neon_fp_reduc_add_d_q, neon_fp_to_int_s_q, neon_fp_to_int_d_q, \
+			neon_add_halve_q, neon_sub_halve_q, neon_qadd_q, neon_compare_q, \
+			neon_compare_zero_q, neon_minmax_q, neon_abd_q, neon_add_q, neon_sub_q, \
+			neon_fp_minmax_s_q, neon_fp_minmax_d_q, neon_reduc_add_q, neon_cls_q, \
+			neon_qabs_q, neon_qneg_q, neon_fp_addsub_s_q, neon_fp_addsub_d_q, \
+			neon_add_long, neon_sub_long"))
+  "thunderx_pipe1 + thunderx_simd, thunderx_simd")
+
+
+;; Thunder 128bit SIMD reads the upper halve in cycle 2 and writes in the last cycle
+(define_bypass 2 "thunderx_neon_move_q" "thunderx_neon_move_q, thunderx_neon_add_q")
+(define_bypass 4 "thunderx_neon_add_q" "thunderx_neon_move_q, thunderx_neon_add_q")
+
+;; Assume both pipes are needed for unknown and multiple-instruction
+;; patterns.
+
+(define_insn_reservation "thunderx_unknown" 1
+  (and (eq_attr "tune" "thunderx")
+       (eq_attr "type" "untyped,multiple"))
+  "thunderx_pipe0 + thunderx_pipe1")
+
+
--- a/src/gcc/config/aarch64/aarch64-opts.h
+++ b/src/gcc/config/aarch64/aarch64-opts.h
@@ -25,7 +25,7 @@
 /* The various cores that implement AArch64.  */
 enum aarch64_processor
 {
-#define AARCH64_CORE(NAME, INTERNAL_IDENT, IDENT, ARCH, FLAGS, COSTS) \
+#define AARCH64_CORE(NAME, INTERNAL_IDENT, SCHED, ARCH, FLAGS, COSTS) \
   INTERNAL_IDENT,
 #include "aarch64-cores.def"
 #undef AARCH64_CORE
--- a/src/gcc/config/aarch64/aarch64-protos.h
+++ b/src/gcc/config/aarch64/aarch64-protos.h
@@ -108,9 +108,22 @@
    cost models and vectors for address cost calculations, register
    move costs and memory move costs.  */
 
+/* Scaled addressing modes can vary cost depending on the mode of the
+   value to be loaded/stored.  QImode values cannot use scaled
+   addressing modes.  */
+
+struct scale_addr_mode_cost
+{
+  const int hi;
+  const int si;
+  const int di;
+  const int ti;
+};
+
 /* Additional cost for addresses.  */
 struct cpu_addrcost_table
 {
+  const struct scale_addr_mode_cost addr_scale_costs;
   const int pre_modify;
   const int post_modify;
   const int register_offset;
@@ -157,9 +170,14 @@
   const struct cpu_vector_cost *const vec_costs;
   const int memmov_cost;
   const int issue_rate;
+  const unsigned int fuseable_ops;
+  const int int_reassoc_width;
+  const int fp_reassoc_width;
+  const int vec_reassoc_width;
 };
 
 HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);
+int aarch64_get_condition_code (rtx);
 bool aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode);
 bool aarch64_cannot_change_mode_class (enum machine_mode,
 				       enum machine_mode,
@@ -166,7 +184,9 @@
 				       enum reg_class);
 enum aarch64_symbol_type
 aarch64_classify_symbolic_expression (rtx, enum aarch64_symbol_context);
+bool aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT);
 bool aarch64_constant_address_p (rtx);
+bool aarch64_expand_movmem (rtx *);
 bool aarch64_float_const_zero_rtx_p (rtx);
 bool aarch64_function_arg_regno_p (unsigned);
 bool aarch64_gen_movmemqi (rtx *);
@@ -175,9 +195,12 @@
 bool aarch64_is_long_call_p (rtx);
 bool aarch64_label_mentioned_p (rtx);
 bool aarch64_legitimate_pic_operand_p (rtx);
+bool aarch64_modes_tieable_p (enum machine_mode mode1,
+			      enum machine_mode mode2);
 bool aarch64_move_imm (HOST_WIDE_INT, enum machine_mode);
 bool aarch64_mov_operand_p (rtx, enum aarch64_symbol_context,
 			    enum machine_mode);
+bool aarch64_offset_7bit_signed_scaled_p (enum machine_mode, HOST_WIDE_INT);
 char *aarch64_output_scalar_simd_mov_immediate (rtx, enum machine_mode);
 char *aarch64_output_simd_mov_immediate (rtx, enum machine_mode, unsigned);
 bool aarch64_pad_arg_upward (enum machine_mode, const_tree);
@@ -184,6 +207,8 @@
 bool aarch64_pad_reg_upward (enum machine_mode, const_tree, bool);
 bool aarch64_regno_ok_for_base_p (int, bool);
 bool aarch64_regno_ok_for_index_p (int, bool);
+bool aarch64_simd_check_vect_par_cnst_half (rtx op, enum machine_mode mode,
+					    bool high);
 bool aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode);
 bool aarch64_simd_imm_zero_p (rtx, enum machine_mode);
 bool aarch64_simd_scalar_immediate_valid_for_move (rtx, enum machine_mode);
@@ -200,6 +225,8 @@
 enum aarch64_symbol_type aarch64_classify_tls_symbol (rtx);
 enum reg_class aarch64_regno_regclass (unsigned);
 int aarch64_asm_preferred_eh_data_format (int, int);
+enum machine_mode aarch64_hard_regno_caller_save_mode (unsigned, unsigned,
+						       enum machine_mode);
 int aarch64_hard_regno_mode_ok (unsigned, enum machine_mode);
 int aarch64_hard_regno_nregs (unsigned, enum machine_mode);
 int aarch64_simd_attr_length_move (rtx);
@@ -231,7 +258,6 @@
 /* Initialize builtins for SIMD intrinsics.  */
 void init_aarch64_simd_builtins (void);
 
-void aarch64_simd_const_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
 void aarch64_simd_disambiguate_copy (rtx *, rtx *, rtx *, unsigned int);
 
 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
@@ -291,4 +317,5 @@
 extern void aarch64_final_prescan_insn (rtx);
 extern bool
 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel);
+void aarch64_atomic_assign_expand_fenv (tree *, tree *, tree *);
 #endif /* GCC_AARCH64_PROTOS_H */
--- a/src/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/src/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -46,37 +46,46 @@
   BUILTIN_VD_BHSI (BINOP, addp, 0)
   VAR1 (UNOP, addp, 0, di)
   BUILTIN_VDQ_BHSI (UNOP, clz, 2)
+  BUILTIN_VS (UNOP, ctz, 2)
 
-  BUILTIN_VALL (GETLANE, get_lane, 0)
-  VAR1 (GETLANE, get_lane, 0, di)
   BUILTIN_VALL (GETLANE, be_checked_get_lane, 0)
 
-  BUILTIN_VD_RE (REINTERP, reinterpretdi, 0)
-  BUILTIN_VDC (REINTERP, reinterpretv8qi, 0)
-  BUILTIN_VDC (REINTERP, reinterpretv4hi, 0)
-  BUILTIN_VDC (REINTERP, reinterpretv2si, 0)
-  BUILTIN_VDC (REINTERP, reinterpretv2sf, 0)
-  BUILTIN_VQ (REINTERP, reinterpretv16qi, 0)
-  BUILTIN_VQ (REINTERP, reinterpretv8hi, 0)
-  BUILTIN_VQ (REINTERP, reinterpretv4si, 0)
-  BUILTIN_VQ (REINTERP, reinterpretv4sf, 0)
-  BUILTIN_VQ (REINTERP, reinterpretv2di, 0)
-  BUILTIN_VQ (REINTERP, reinterpretv2df, 0)
+  VAR1 (REINTERP_SS, reinterpretdi, 0, df)
+  VAR1 (REINTERP_SS, reinterpretv8qi, 0, df)
+  VAR1 (REINTERP_SS, reinterpretv4hi, 0, df)
+  VAR1 (REINTERP_SS, reinterpretv2si, 0, df)
+  VAR1 (REINTERP_SS, reinterpretv2sf, 0, df)
+  BUILTIN_VD (REINTERP_SS, reinterpretdf, 0)
 
-  BUILTIN_VDQ_I (BINOP, dup_lane, 0)
+  BUILTIN_VD (REINTERP_SU, reinterpretdf, 0)
+
+  VAR1 (REINTERP_US, reinterpretdi, 0, df)
+  VAR1 (REINTERP_US, reinterpretv8qi, 0, df)
+  VAR1 (REINTERP_US, reinterpretv4hi, 0, df)
+  VAR1 (REINTERP_US, reinterpretv2si, 0, df)
+  VAR1 (REINTERP_US, reinterpretv2sf, 0, df)
+
+  BUILTIN_VD (REINTERP_SP, reinterpretdf, 0)
+
+  VAR1 (REINTERP_PS, reinterpretdi, 0, df)
+  VAR1 (REINTERP_PS, reinterpretv8qi, 0, df)
+  VAR1 (REINTERP_PS, reinterpretv4hi, 0, df)
+  VAR1 (REINTERP_PS, reinterpretv2si, 0, df)
+  VAR1 (REINTERP_PS, reinterpretv2sf, 0, df)
+
   /* Implemented by aarch64_<sur>q<r>shl<mode>.  */
   BUILTIN_VSDQ_I (BINOP, sqshl, 0)
-  BUILTIN_VSDQ_I (BINOP, uqshl, 0)
+  BUILTIN_VSDQ_I (BINOP_UUS, uqshl, 0)
   BUILTIN_VSDQ_I (BINOP, sqrshl, 0)
-  BUILTIN_VSDQ_I (BINOP, uqrshl, 0)
+  BUILTIN_VSDQ_I (BINOP_UUS, uqrshl, 0)
   /* Implemented by aarch64_<su_optab><optab><mode>.  */
   BUILTIN_VSDQ_I (BINOP, sqadd, 0)
-  BUILTIN_VSDQ_I (BINOP, uqadd, 0)
+  BUILTIN_VSDQ_I (BINOPU, uqadd, 0)
   BUILTIN_VSDQ_I (BINOP, sqsub, 0)
-  BUILTIN_VSDQ_I (BINOP, uqsub, 0)
+  BUILTIN_VSDQ_I (BINOPU, uqsub, 0)
   /* Implemented by aarch64_<sur>qadd<mode>.  */
-  BUILTIN_VSDQ_I (BINOP, suqadd, 0)
-  BUILTIN_VSDQ_I (BINOP, usqadd, 0)
+  BUILTIN_VSDQ_I (BINOP_SSU, suqadd, 0)
+  BUILTIN_VSDQ_I (BINOP_UUS, usqadd, 0)
 
   /* Implemented by aarch64_get_dreg<VSTRUCT:mode><VDC:mode>.  */
   BUILTIN_VDC (GETLANE, get_dregoi, 0)
@@ -98,6 +107,14 @@
   BUILTIN_VQ (LOADSTRUCT, ld2, 0)
   BUILTIN_VQ (LOADSTRUCT, ld3, 0)
   BUILTIN_VQ (LOADSTRUCT, ld4, 0)
+  /* Implemented by aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>.  */
+  BUILTIN_VALLDIF (LOADSTRUCT, ld2r, 0)
+  BUILTIN_VALLDIF (LOADSTRUCT, ld3r, 0)
+  BUILTIN_VALLDIF (LOADSTRUCT, ld4r, 0)
+  /* Implemented by aarch64_ld<VSTRUCT:nregs>_lane<VQ:mode>.  */
+  BUILTIN_VQ (LOADSTRUCT_LANE, ld2_lane, 0)
+  BUILTIN_VQ (LOADSTRUCT_LANE, ld3_lane, 0)
+  BUILTIN_VQ (LOADSTRUCT_LANE, ld4_lane, 0)
   /* Implemented by aarch64_st<VSTRUCT:nregs><VDC:mode>.  */
   BUILTIN_VDC (STORESTRUCT, st2, 0)
   BUILTIN_VDC (STORESTRUCT, st3, 0)
@@ -107,6 +124,10 @@
   BUILTIN_VQ (STORESTRUCT, st3, 0)
   BUILTIN_VQ (STORESTRUCT, st4, 0)
 
+  BUILTIN_VQ (STORESTRUCT_LANE, st2_lane, 0)
+  BUILTIN_VQ (STORESTRUCT_LANE, st3_lane, 0)
+  BUILTIN_VQ (STORESTRUCT_LANE, st4_lane, 0)
+
   BUILTIN_VQW (BINOP, saddl2, 0)
   BUILTIN_VQW (BINOP, uaddl2, 0)
   BUILTIN_VQW (BINOP, ssubl2, 0)
@@ -142,19 +163,19 @@
   BUILTIN_VSQN_HSDI (UNOP, sqmovn, 0)
   BUILTIN_VSQN_HSDI (UNOP, uqmovn, 0)
   /* Implemented by aarch64_s<optab><mode>.  */
-  BUILTIN_VSDQ_I_BHSI (UNOP, sqabs, 0)
-  BUILTIN_VSDQ_I_BHSI (UNOP, sqneg, 0)
+  BUILTIN_VSDQ_I (UNOP, sqabs, 0)
+  BUILTIN_VSDQ_I (UNOP, sqneg, 0)
 
-  BUILTIN_VSD_HSI (QUADOP, sqdmlal_lane, 0)
-  BUILTIN_VSD_HSI (QUADOP, sqdmlsl_lane, 0)
-  BUILTIN_VSD_HSI (QUADOP, sqdmlal_laneq, 0)
-  BUILTIN_VSD_HSI (QUADOP, sqdmlsl_laneq, 0)
+  BUILTIN_VSD_HSI (TERNOP_LANE, sqdmlal_lane, 0)
+  BUILTIN_VSD_HSI (TERNOP_LANE, sqdmlsl_lane, 0)
+  BUILTIN_VSD_HSI (TERNOP_LANE, sqdmlal_laneq, 0)
+  BUILTIN_VSD_HSI (TERNOP_LANE, sqdmlsl_laneq, 0)
   BUILTIN_VQ_HSI (TERNOP, sqdmlal2, 0)
   BUILTIN_VQ_HSI (TERNOP, sqdmlsl2, 0)
-  BUILTIN_VQ_HSI (QUADOP, sqdmlal2_lane, 0)
-  BUILTIN_VQ_HSI (QUADOP, sqdmlsl2_lane, 0)
-  BUILTIN_VQ_HSI (QUADOP, sqdmlal2_laneq, 0)
-  BUILTIN_VQ_HSI (QUADOP, sqdmlsl2_laneq, 0)
+  BUILTIN_VQ_HSI (TERNOP_LANE, sqdmlal2_lane, 0)
+  BUILTIN_VQ_HSI (TERNOP_LANE, sqdmlsl2_lane, 0)
+  BUILTIN_VQ_HSI (TERNOP_LANE, sqdmlal2_laneq, 0)
+  BUILTIN_VQ_HSI (TERNOP_LANE, sqdmlsl2_laneq, 0)
   BUILTIN_VQ_HSI (TERNOP, sqdmlal2_n, 0)
   BUILTIN_VQ_HSI (TERNOP, sqdmlsl2_n, 0)
   /* Implemented by aarch64_sqdml<SBINQOPS:as>l<mode>.  */
@@ -166,7 +187,7 @@
 
   BUILTIN_VSD_HSI (BINOP, sqdmull, 0)
   BUILTIN_VSD_HSI (TERNOP, sqdmull_lane, 0)
-  BUILTIN_VD_HSI (TERNOP, sqdmull_laneq, 0)
+  BUILTIN_VSD_HSI (TERNOP, sqdmull_laneq, 0)
   BUILTIN_VD_HSI (BINOP, sqdmull_n, 0)
   BUILTIN_VQ_HSI (BINOP, sqdmull2, 0)
   BUILTIN_VQ_HSI (TERNOP, sqdmull2_lane, 0)
@@ -186,9 +207,9 @@
   BUILTIN_VSDQ_I_DI (BINOP, ashl, 3)
   /* Implemented by aarch64_<sur>shl<mode>.  */
   BUILTIN_VSDQ_I_DI (BINOP, sshl, 0)
-  BUILTIN_VSDQ_I_DI (BINOP, ushl, 0)
+  BUILTIN_VSDQ_I_DI (BINOP_UUS, ushl, 0)
   BUILTIN_VSDQ_I_DI (BINOP, srshl, 0)
-  BUILTIN_VSDQ_I_DI (BINOP, urshl, 0)
+  BUILTIN_VSDQ_I_DI (BINOP_UUS, urshl, 0)
 
   BUILTIN_VDQ_I (SHIFTIMM, ashr, 3)
   VAR1 (SHIFTIMM, ashr_simd, 0, di)
@@ -196,15 +217,15 @@
   VAR1 (USHIFTIMM, lshr_simd, 0, di)
   /* Implemented by aarch64_<sur>shr_n<mode>.  */
   BUILTIN_VSDQ_I_DI (SHIFTIMM, srshr_n, 0)
-  BUILTIN_VSDQ_I_DI (SHIFTIMM, urshr_n, 0)
+  BUILTIN_VSDQ_I_DI (USHIFTIMM, urshr_n, 0)
   /* Implemented by aarch64_<sur>sra_n<mode>.  */
   BUILTIN_VSDQ_I_DI (SHIFTACC, ssra_n, 0)
-  BUILTIN_VSDQ_I_DI (SHIFTACC, usra_n, 0)
+  BUILTIN_VSDQ_I_DI (USHIFTACC, usra_n, 0)
   BUILTIN_VSDQ_I_DI (SHIFTACC, srsra_n, 0)
-  BUILTIN_VSDQ_I_DI (SHIFTACC, ursra_n, 0)
+  BUILTIN_VSDQ_I_DI (USHIFTACC, ursra_n, 0)
   /* Implemented by aarch64_<sur>shll_n<mode>.  */
   BUILTIN_VDW (SHIFTIMM, sshll_n, 0)
-  BUILTIN_VDW (SHIFTIMM, ushll_n, 0)
+  BUILTIN_VDW (USHIFTIMM, ushll_n, 0)
   /* Implemented by aarch64_<sur>shll2_n<mode>.  */
   BUILTIN_VQW (SHIFTIMM, sshll2_n, 0)
   BUILTIN_VQW (SHIFTIMM, ushll2_n, 0)
@@ -212,42 +233,30 @@
   BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrun_n, 0)
   BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrun_n, 0)
   BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrn_n, 0)
-  BUILTIN_VSQN_HSDI (SHIFTIMM, uqshrn_n, 0)
+  BUILTIN_VSQN_HSDI (USHIFTIMM, uqshrn_n, 0)
   BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrn_n, 0)
-  BUILTIN_VSQN_HSDI (SHIFTIMM, uqrshrn_n, 0)
+  BUILTIN_VSQN_HSDI (USHIFTIMM, uqrshrn_n, 0)
   /* Implemented by aarch64_<sur>s<lr>i_n<mode>.  */
   BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssri_n, 0)
-  BUILTIN_VSDQ_I_DI (SHIFTINSERT, usri_n, 0)
+  BUILTIN_VSDQ_I_DI (USHIFTACC, usri_n, 0)
   BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssli_n, 0)
-  BUILTIN_VSDQ_I_DI (SHIFTINSERT, usli_n, 0)
+  BUILTIN_VSDQ_I_DI (USHIFTACC, usli_n, 0)
   /* Implemented by aarch64_<sur>qshl<u>_n<mode>.  */
-  BUILTIN_VSDQ_I (SHIFTIMM, sqshlu_n, 0)
+  BUILTIN_VSDQ_I (SHIFTIMM_USS, sqshlu_n, 0)
   BUILTIN_VSDQ_I (SHIFTIMM, sqshl_n, 0)
-  BUILTIN_VSDQ_I (SHIFTIMM, uqshl_n, 0)
+  BUILTIN_VSDQ_I (USHIFTIMM, uqshl_n, 0)
 
-  /* Implemented by aarch64_cm<cmp><mode>.  */
-  BUILTIN_VALLDI (BINOP, cmeq, 0)
-  BUILTIN_VALLDI (BINOP, cmge, 0)
-  BUILTIN_VALLDI (BINOP, cmgt, 0)
-  BUILTIN_VALLDI (BINOP, cmle, 0)
-  BUILTIN_VALLDI (BINOP, cmlt, 0)
-  /* Implemented by aarch64_cm<cmp><mode>.  */
-  BUILTIN_VSDQ_I_DI (BINOP, cmgeu, 0)
-  BUILTIN_VSDQ_I_DI (BINOP, cmgtu, 0)
-  BUILTIN_VSDQ_I_DI (BINOP, cmtst, 0)
+  /* Implemented by aarch64_reduc_plus_<mode>.  */
+  BUILTIN_VALL (UNOP, reduc_plus_scal_, 10)
 
-  /* Implemented by reduc_<sur>plus_<mode>.  */
-  BUILTIN_VALL (UNOP, reduc_splus_, 10)
-  BUILTIN_VDQ (UNOP, reduc_uplus_, 10)
+  /* Implemented by reduc_<maxmin_uns>_scal_<mode> (producing scalar).  */
+  BUILTIN_VDQIF (UNOP, reduc_smax_scal_, 10)
+  BUILTIN_VDQIF (UNOP, reduc_smin_scal_, 10)
+  BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10)
+  BUILTIN_VDQ_BHSI (UNOPU, reduc_umin_scal_, 10)
+  BUILTIN_VDQF (UNOP, reduc_smax_nan_scal_, 10)
+  BUILTIN_VDQF (UNOP, reduc_smin_nan_scal_, 10)
 
-  /* Implemented by reduc_<maxmin_uns>_<mode>.  */
-  BUILTIN_VDQIF (UNOP, reduc_smax_, 10)
-  BUILTIN_VDQIF (UNOP, reduc_smin_, 10)
-  BUILTIN_VDQ_BHSI (UNOP, reduc_umax_, 10)
-  BUILTIN_VDQ_BHSI (UNOP, reduc_umin_, 10)
-  BUILTIN_VDQF (UNOP, reduc_smax_nan_, 10)
-  BUILTIN_VDQF (UNOP, reduc_smin_nan_, 10)
-
   /* Implemented by <maxmin><mode>3.
      smax variants map to fmaxnm,
      smax_nan variants map to fmax.  */
@@ -265,7 +274,7 @@
   BUILTIN_VDQF (UNOP, nearbyint, 2)
   BUILTIN_VDQF (UNOP, rint, 2)
   BUILTIN_VDQF (UNOP, round, 2)
-  BUILTIN_VDQF (UNOP, frintn, 2)
+  BUILTIN_VDQF_DF (UNOP, frintn, 2)
 
   /* Implemented by l<fcvt_pattern><su_optab><VQDF:mode><vcvt_target>2.  */
   VAR1 (UNOP, lbtruncv2sf, 2, v2si)
@@ -330,6 +339,10 @@
   VAR1 (UNOP, floatunsv4si, 2, v4sf)
   VAR1 (UNOP, floatunsv2di, 2, v2df)
 
+  VAR5 (UNOPU, bswap, 10, v4hi, v8hi, v2si, v4si, v2di)
+
+  BUILTIN_VB (UNOP, rbit, 0)
+
   /* Implemented by
      aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>.  */
   BUILTIN_VALL (BINOP, zip1, 0)
@@ -393,3 +406,6 @@
   /* Implemented by aarch64_crypto_pmull<mode>.  */
   VAR1 (BINOPP, crypto_pmull, 0, di)
   VAR1 (BINOPP, crypto_pmull, 0, v2di)
+
+  /* Meta-op to check lane bounds of immediate in aarch64_expand_builtin.  */
+  VAR1 (BINOPV, im_lane_bound, 0, si)
--- a/src/gcc/config/aarch64/constraints.md
+++ b/src/gcc/config/aarch64/constraints.md
@@ -21,6 +21,9 @@
 (define_register_constraint "k" "STACK_REG"
   "@internal The stack register.")
 
+(define_register_constraint "Ucs" "CALLER_SAVE_REGS"
+  "@internal The caller save registers.")
+
 (define_register_constraint "w" "FP_REGS"
   "Floating point and SIMD vector registers.")
 
@@ -92,6 +95,10 @@
   (and (match_code "const_int")
        (match_test "(unsigned HOST_WIDE_INT) ival < 64")))
 
+(define_constraint "Usf"
+  "@internal Usf is a symbol reference."
+  (match_code "symbol_ref"))
+
 (define_constraint "UsM"
   "@internal
   A constraint that matches the immediate constant -1."
--- a/src/gcc/config/aarch64/aarch64.c
+++ b/src/gcc/config/aarch64/aarch64.c
@@ -62,7 +62,10 @@
 #include "dwarf2.h"
 #include "cfgloop.h"
 #include "tree-vectorizer.h"
-#include "config/arm/aarch-cost-tables.h"
+#include "aarch64-cost-tables.h"
+#include "dumpfile.h"
+#include "tm-constrs.h"
+#include "sched-int.h"
 
 /* Defined for convenience.  */
 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
@@ -136,12 +139,13 @@
 static void aarch64_override_options_after_change (void);
 static bool aarch64_vector_mode_supported_p (enum machine_mode);
 static unsigned bit_count (unsigned HOST_WIDE_INT);
-static bool aarch64_const_vec_all_same_int_p (rtx,
-					      HOST_WIDE_INT, HOST_WIDE_INT);
-
 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
 						 const unsigned char *sel);
+static int aarch64_address_cost (rtx, enum machine_mode, addr_space_t, bool);
 
+/* Major revision number of the ARM Architecture implemented by the target.  */
+unsigned aarch64_architecture_version;
+
 /* The processor for which instructions should be scheduled.  */
 enum aarch64_processor aarch64_tune = cortexa53;
 
@@ -171,6 +175,15 @@
 #endif
 static const struct cpu_addrcost_table generic_addrcost_table =
 {
+#if HAVE_DESIGNATED_INITIALIZERS
+  .addr_scale_costs =
+#endif
+    {
+      NAMED_PARAM (hi, 0),
+      NAMED_PARAM (si, 0),
+      NAMED_PARAM (di, 0),
+      NAMED_PARAM (ti, 0),
+    },
   NAMED_PARAM (pre_modify, 0),
   NAMED_PARAM (post_modify, 0),
   NAMED_PARAM (register_offset, 0),
@@ -181,17 +194,96 @@
 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 __extension__
 #endif
+static const struct cpu_addrcost_table cortexa57_addrcost_table =
+{
+#if HAVE_DESIGNATED_INITIALIZERS
+  .addr_scale_costs =
+#endif
+    {
+      NAMED_PARAM (hi, 1),
+      NAMED_PARAM (si, 0),
+      NAMED_PARAM (di, 0),
+      NAMED_PARAM (ti, 1),
+    },
+  NAMED_PARAM (pre_modify, 0),
+  NAMED_PARAM (post_modify, 0),
+  NAMED_PARAM (register_offset, 0),
+  NAMED_PARAM (register_extend, 0),
+  NAMED_PARAM (imm_offset, 0),
+};
+
+#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
+__extension__
+#endif
+static const struct cpu_addrcost_table xgene1_addrcost_table =
+{
+#if HAVE_DESIGNATED_INITIALIZERS
+  .addr_scale_costs =
+#endif
+    {
+      NAMED_PARAM (hi, 1),
+      NAMED_PARAM (si, 0),
+      NAMED_PARAM (di, 0),
+      NAMED_PARAM (ti, 1),
+    },
+  NAMED_PARAM (pre_modify, 1),
+  NAMED_PARAM (post_modify, 0),
+  NAMED_PARAM (register_offset, 0),
+  NAMED_PARAM (register_extend, 1),
+  NAMED_PARAM (imm_offset, 0),
+};
+
+#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
+__extension__
+#endif
 static const struct cpu_regmove_cost generic_regmove_cost =
 {
   NAMED_PARAM (GP2GP, 1),
+  /* Avoid the use of slow int<->fp moves for spilling by setting
+     their cost higher than memmov_cost.  */
+  NAMED_PARAM (GP2FP, 5),
+  NAMED_PARAM (FP2GP, 5),
+  NAMED_PARAM (FP2FP, 2)
+};
+
+static const struct cpu_regmove_cost cortexa57_regmove_cost =
+{
+  NAMED_PARAM (GP2GP, 1),
+  /* Avoid the use of slow int<->fp moves for spilling by setting
+     their cost higher than memmov_cost.  */
+  NAMED_PARAM (GP2FP, 5),
+  NAMED_PARAM (FP2GP, 5),
+  NAMED_PARAM (FP2FP, 2)
+};
+
+static const struct cpu_regmove_cost cortexa53_regmove_cost =
+{
+  NAMED_PARAM (GP2GP, 1),
+  /* Avoid the use of slow int<->fp moves for spilling by setting
+     their cost higher than memmov_cost.  */
+  NAMED_PARAM (GP2FP, 5),
+  NAMED_PARAM (FP2GP, 5),
+  NAMED_PARAM (FP2FP, 2)
+};
+
+static const struct cpu_regmove_cost thunderx_regmove_cost =
+{
+  NAMED_PARAM (GP2GP, 2),
   NAMED_PARAM (GP2FP, 2),
-  NAMED_PARAM (FP2GP, 2),
-  /* We currently do not provide direct support for TFmode Q->Q move.
-     Therefore we need to raise the cost above 2 in order to have
-     reload handle the situation.  */
+  NAMED_PARAM (FP2GP, 6),
   NAMED_PARAM (FP2FP, 4)
 };
 
+static const struct cpu_regmove_cost xgene1_regmove_cost =
+{
+  NAMED_PARAM (GP2GP, 1),
+  /* Avoid the use of slow int<->fp moves for spilling by setting
+     their cost higher than memmov_cost.  */
+  NAMED_PARAM (GP2FP, 8),
+  NAMED_PARAM (FP2GP, 8),
+  NAMED_PARAM (FP2FP, 2)
+};
+
 /* Generic costs for vector insn classes.  */
 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 __extension__
@@ -212,9 +304,56 @@
   NAMED_PARAM (cond_not_taken_branch_cost, 1)
 };
 
+/* Generic costs for vector insn classes.  */
 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 __extension__
 #endif
+static const struct cpu_vector_cost cortexa57_vector_cost =
+{
+  NAMED_PARAM (scalar_stmt_cost, 1),
+  NAMED_PARAM (scalar_load_cost, 4),
+  NAMED_PARAM (scalar_store_cost, 1),
+  NAMED_PARAM (vec_stmt_cost, 3),
+  NAMED_PARAM (vec_to_scalar_cost, 8),
+  NAMED_PARAM (scalar_to_vec_cost, 8),
+  NAMED_PARAM (vec_align_load_cost, 5),
+  NAMED_PARAM (vec_unalign_load_cost, 5),
+  NAMED_PARAM (vec_unalign_store_cost, 1),
+  NAMED_PARAM (vec_store_cost, 1),
+  NAMED_PARAM (cond_taken_branch_cost, 1),
+  NAMED_PARAM (cond_not_taken_branch_cost, 1)
+};
+
+/* Generic costs for vector insn classes.  */
+#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
+__extension__
+#endif
+static const struct cpu_vector_cost xgene1_vector_cost =
+{
+  NAMED_PARAM (scalar_stmt_cost, 1),
+  NAMED_PARAM (scalar_load_cost, 5),
+  NAMED_PARAM (scalar_store_cost, 1),
+  NAMED_PARAM (vec_stmt_cost, 2),
+  NAMED_PARAM (vec_to_scalar_cost, 4),
+  NAMED_PARAM (scalar_to_vec_cost, 4),
+  NAMED_PARAM (vec_align_load_cost, 10),
+  NAMED_PARAM (vec_unalign_load_cost, 10),
+  NAMED_PARAM (vec_unalign_store_cost, 2),
+  NAMED_PARAM (vec_store_cost, 2),
+  NAMED_PARAM (cond_taken_branch_cost, 2),
+  NAMED_PARAM (cond_not_taken_branch_cost, 1)
+};
+
+#define AARCH64_FUSE_NOTHING	(0)
+#define AARCH64_FUSE_MOV_MOVK	(1 << 0)
+#define AARCH64_FUSE_ADRP_ADD	(1 << 1)
+#define AARCH64_FUSE_MOVK_MOVK	(1 << 2)
+#define AARCH64_FUSE_ADRP_LDR	(1 << 3)
+#define AARCH64_FUSE_CMP_BRANCH	(1 << 4)
+
+#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
+__extension__
+#endif
 static const struct tune_params generic_tunings =
 {
   &cortexa57_extra_costs,
@@ -222,7 +361,11 @@
   &generic_regmove_cost,
   &generic_vector_cost,
   NAMED_PARAM (memmov_cost, 4),
-  NAMED_PARAM (issue_rate, 2)
+  NAMED_PARAM (issue_rate, 2),
+  NAMED_PARAM (fuseable_ops, AARCH64_FUSE_NOTHING),
+  2,	/* int_reassoc_width.  */
+  4,	/* fp_reassoc_width.  */
+  1	/* vec_reassoc_width.  */
 };
 
 static const struct tune_params cortexa53_tunings =
@@ -229,22 +372,59 @@
 {
   &cortexa53_extra_costs,
   &generic_addrcost_table,
-  &generic_regmove_cost,
+  &cortexa53_regmove_cost,
   &generic_vector_cost,
   NAMED_PARAM (memmov_cost, 4),
-  NAMED_PARAM (issue_rate, 2)
+  NAMED_PARAM (issue_rate, 2),
+  NAMED_PARAM (fuseable_ops, (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
+                             | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR)),
+  2,	/* int_reassoc_width.  */
+  4,	/* fp_reassoc_width.  */
+  1	/* vec_reassoc_width.  */
 };
 
 static const struct tune_params cortexa57_tunings =
 {
   &cortexa57_extra_costs,
+  &cortexa57_addrcost_table,
+  &cortexa57_regmove_cost,
+  &cortexa57_vector_cost,
+  NAMED_PARAM (memmov_cost, 4),
+  NAMED_PARAM (issue_rate, 3),
+  NAMED_PARAM (fuseable_ops, (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD | AARCH64_FUSE_MOVK_MOVK)),
+  2,	/* int_reassoc_width.  */
+  4,	/* fp_reassoc_width.  */
+  1	/* vec_reassoc_width.  */
+};
+
+static const struct tune_params thunderx_tunings =
+{
+  &thunderx_extra_costs,
   &generic_addrcost_table,
-  &generic_regmove_cost,
+  &thunderx_regmove_cost,
   &generic_vector_cost,
-  NAMED_PARAM (memmov_cost, 4),
-  NAMED_PARAM (issue_rate, 3)
+  NAMED_PARAM (memmov_cost, 6),
+  NAMED_PARAM (issue_rate, 2),
+  NAMED_PARAM (fuseable_ops, AARCH64_FUSE_CMP_BRANCH),
+  2,	/* int_reassoc_width.  */
+  4,	/* fp_reassoc_width.  */
+  1	/* vec_reassoc_width.  */
 };
 
+static const struct tune_params xgene1_tunings =
+{
+  &xgene1_extra_costs,
+  &xgene1_addrcost_table,
+  &xgene1_regmove_cost,
+  &xgene1_vector_cost,
+  NAMED_PARAM (memmov_cost, 6),
+  NAMED_PARAM (issue_rate, 4),
+  NAMED_PARAM (fuseable_ops, AARCH64_FUSE_NOTHING),
+  2,	/* int_reassoc_width.  */
+  4,	/* fp_reassoc_width.  */
+  1	/* vec_reassoc_width.  */
+};
+
 /* A processor implementing AArch64.  */
 struct processor
 {
@@ -251,6 +431,7 @@
   const char *const name;
   enum aarch64_processor core;
   const char *arch;
+  unsigned architecture_version;
   const unsigned long flags;
   const struct tune_params *const tune;
 };
@@ -258,12 +439,12 @@
 /* Processor cores implementing AArch64.  */
 static const struct processor all_cores[] =
 {
-#define AARCH64_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
-  {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
+#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS) \
+  {NAME, SCHED, #ARCH, ARCH, FLAGS, &COSTS##_tunings},
 #include "aarch64-cores.def"
 #undef AARCH64_CORE
-  {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
-  {NULL, aarch64_none, NULL, 0, NULL}
+  {"generic", cortexa53, "8", 8, AARCH64_FL_FOR_ARCH8, &generic_tunings},
+  {NULL, aarch64_none, NULL, 0, 0, NULL}
 };
 
 /* Architectures implementing AArch64.  */
@@ -270,10 +451,10 @@
 static const struct processor all_architectures[] =
 {
 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
-  {NAME, CORE, #ARCH, FLAGS, NULL},
+  {NAME, CORE, #ARCH, ARCH, FLAGS, NULL},
 #include "aarch64-arches.def"
 #undef AARCH64_ARCH
-  {NULL, aarch64_none, NULL, 0, NULL}
+  {NULL, aarch64_none, NULL, 0, 0, NULL}
 };
 
 /* Target specification.  These are populated as commandline arguments
@@ -332,6 +513,25 @@
   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 };
 
+static unsigned int
+aarch64_min_divisions_for_recip_mul (enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  return 2;
+}
+
+static int
+aarch64_reassociation_width (unsigned opc ATTRIBUTE_UNUSED,
+			     enum machine_mode mode)
+{
+  if (VECTOR_MODE_P (mode))
+    return aarch64_tune_params->vec_reassoc_width;
+  if (INTEGRAL_MODE_P (mode))
+    return aarch64_tune_params->int_reassoc_width;
+  if (FLOAT_MODE_P (mode))
+    return aarch64_tune_params->fp_reassoc_width;
+  return 1;
+}
+
 /* Provide a mapping from gcc register numbers to dwarf register numbers.  */
 unsigned
 aarch64_dbx_register_number (unsigned regno)
@@ -424,6 +624,24 @@
   return 0;
 }
 
+/* Implement HARD_REGNO_CALLER_SAVE_MODE.  */
+enum machine_mode
+aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs,
+				     enum machine_mode mode)
+{
+  /* Handle modes that fit within single registers.  */
+  if (nregs == 1 && GET_MODE_SIZE (mode) <= 16)
+    {
+      if (GET_MODE_SIZE (mode) >= 4)
+        return mode;
+      else
+        return SImode;
+    }
+  /* Fall back to generic for multi-reg and very large modes.  */
+  else
+    return choose_hard_reg_mode (regno, nregs, false);
+}
+
 /* Return true if calls to DECL should be treated as
    long-calls (ie called via a register).  */
 static bool
@@ -444,7 +662,7 @@
    represent an expression that matches an extend operation.  The
    operands represent the paramters from
 
-   (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)).  */
+   (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)).  */
 bool
 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
 				rtx extract_imm)
@@ -636,12 +854,24 @@
 
     case SYMBOL_SMALL_TLSDESC:
       {
-	rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
+	enum machine_mode mode = GET_MODE (dest);
+	rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
 	rtx tp;
 
-	emit_insn (gen_tlsdesc_small (imm));
+	gcc_assert (mode == Pmode || mode == ptr_mode);
+
+	/* In ILP32, the got entry is always of SImode size.  Unlike
+	   small GOT, the dest is fixed at reg 0.  */
+	if (TARGET_ILP32)
+	  emit_insn (gen_tlsdesc_small_si (imm));
+	else
+	  emit_insn (gen_tlsdesc_small_di (imm));
 	tp = aarch64_load_tp (NULL);
-	emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
+
+	if (mode != Pmode)
+	  tp = gen_lowpart (mode, tp);
+
+	emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, x0)));
 	set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 	return;
       }
@@ -648,10 +878,34 @@
 
     case SYMBOL_SMALL_GOTTPREL:
       {
-	rtx tmp_reg = gen_reg_rtx (Pmode);
+	/* In ILP32, the mode of dest can be either SImode or DImode,
+	   while the got entry is always of SImode size.  The mode of
+	   dest depends on how dest is used: if dest is assigned to a
+	   pointer (e.g. in the memory), it has SImode; it may have
+	   DImode if dest is dereferenced to access the memeory.
+	   This is why we have to handle three different tlsie_small
+	   patterns here (two patterns for ILP32).  */
+	enum machine_mode mode = GET_MODE (dest);
+	rtx tmp_reg = gen_reg_rtx (mode);
 	rtx tp = aarch64_load_tp (NULL);
-	emit_insn (gen_tlsie_small (tmp_reg, imm));
-	emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
+
+	if (mode == ptr_mode)
+	  {
+	    if (mode == DImode)
+	      emit_insn (gen_tlsie_small_di (tmp_reg, imm));
+	    else
+	      {
+		emit_insn (gen_tlsie_small_si (tmp_reg, imm));
+		tp = gen_lowpart (mode, tp);
+	      }
+	  }
+	else
+	  {
+	    gcc_assert (mode == Pmode);
+	    emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
+	  }
+
+	emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
 	set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 	return;
       }
@@ -893,10 +1147,10 @@
   return plus_constant (mode, reg, offset);
 }
 
-void
-aarch64_expand_mov_immediate (rtx dest, rtx imm)
+static int
+aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
+				machine_mode mode)
 {
-  enum machine_mode mode = GET_MODE (dest);
   unsigned HOST_WIDE_INT mask;
   int i;
   bool first;
@@ -903,86 +1157,15 @@
   unsigned HOST_WIDE_INT val;
   bool subtargets;
   rtx subtarget;
-  int one_match, zero_match;
+  int one_match, zero_match, first_not_ffff_match;
+  int num_insns = 0;
 
-  gcc_assert (mode == SImode || mode == DImode);
-
-  /* Check on what type of symbol it is.  */
-  if (GET_CODE (imm) == SYMBOL_REF
-      || GET_CODE (imm) == LABEL_REF
-      || GET_CODE (imm) == CONST)
-    {
-      rtx mem, base, offset;
-      enum aarch64_symbol_type sty;
-
-      /* If we have (const (plus symbol offset)), separate out the offset
-	 before we start classifying the symbol.  */
-      split_const (imm, &base, &offset);
-
-      sty = aarch64_classify_symbol (base, offset, SYMBOL_CONTEXT_ADR);
-      switch (sty)
-	{
-	case SYMBOL_FORCE_TO_MEM:
-	  if (offset != const0_rtx
-	      && targetm.cannot_force_const_mem (mode, imm))
-	    {
-	      gcc_assert (can_create_pseudo_p ());
-	      base = aarch64_force_temporary (mode, dest, base);
-	      base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
-	      aarch64_emit_move (dest, base);
-	      return;
-	    }
-	  mem = force_const_mem (ptr_mode, imm);
-	  gcc_assert (mem);
-	  if (mode != ptr_mode)
-	    mem = gen_rtx_ZERO_EXTEND (mode, mem);
-	  emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
-	  return;
-
-        case SYMBOL_SMALL_TLSGD:
-        case SYMBOL_SMALL_TLSDESC:
-        case SYMBOL_SMALL_GOTTPREL:
-	case SYMBOL_SMALL_GOT:
-	case SYMBOL_TINY_GOT:
-	  if (offset != const0_rtx)
-	    {
-	      gcc_assert(can_create_pseudo_p ());
-	      base = aarch64_force_temporary (mode, dest, base);
-	      base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
-	      aarch64_emit_move (dest, base);
-	      return;
-	    }
-	  /* FALLTHRU */
-
-        case SYMBOL_SMALL_TPREL:
-	case SYMBOL_SMALL_ABSOLUTE:
-	case SYMBOL_TINY_ABSOLUTE:
-	  aarch64_load_symref_appropriately (dest, imm, sty);
-	  return;
-
-	default:
-	  gcc_unreachable ();
-	}
-    }
-
   if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
     {
-      emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
-      return;
-    }
-
-  if (!CONST_INT_P (imm))
-    {
-      if (GET_CODE (imm) == HIGH)
+      if (generate)
 	emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
-      else
-        {
-	  rtx mem = force_const_mem (mode, imm);
-	  gcc_assert (mem);
-	  emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
-	}
-
-      return;
+      num_insns++;
+      return num_insns;
     }
 
   if (mode == SImode)
@@ -990,10 +1173,15 @@
       /* We know we can't do this in 1 insn, and we must be able to do it
 	 in two; so don't mess around looking for sequences that don't buy
 	 us anything.  */
-      emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
-      emit_insn (gen_insv_immsi (dest, GEN_INT (16),
-				 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
-      return;
+      if (generate)
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode, dest,
+				  GEN_INT (INTVAL (imm) & 0xffff)));
+	  emit_insn (gen_insv_immsi (dest, GEN_INT (16),
+				     GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
+	}
+      num_insns += 2;
+      return num_insns;
     }
 
   /* Remaining cases are all for DImode.  */
@@ -1004,29 +1192,34 @@
   one_match = 0;
   zero_match = 0;
   mask = 0xffff;
+  first_not_ffff_match = -1;
 
   for (i = 0; i < 64; i += 16, mask <<= 16)
     {
-      if ((val & mask) == 0)
-	zero_match++;
-      else if ((val & mask) == mask)
+      if ((val & mask) == mask)
 	one_match++;
+      else
+	{
+	  if (first_not_ffff_match < 0)
+	    first_not_ffff_match = i;
+	  if ((val & mask) == 0)
+	    zero_match++;
+	}
     }
 
   if (one_match == 2)
     {
-      mask = 0xffff;
-      for (i = 0; i < 64; i += 16, mask <<= 16)
+      /* Set one of the quarters and then insert back into result.  */
+      mask = 0xffffll << first_not_ffff_match;
+      if (generate)
 	{
-	  if ((val & mask) != mask)
-	    {
-	      emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
-	      emit_insn (gen_insv_immdi (dest, GEN_INT (i),
-					 GEN_INT ((val >> i) & 0xffff)));
-	      return;
-	    }
+	  emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
+	  emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match),
+				     GEN_INT ((val >> first_not_ffff_match)
+					      & 0xffff)));
 	}
-      gcc_unreachable ();
+      num_insns += 2;
+      return num_insns;
     }
 
   if (zero_match == 2)
@@ -1039,42 +1232,55 @@
 
       if (aarch64_uimm12_shift (val - (val & mask)))
 	{
-	  subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
-
-	  emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
-	  emit_insn (gen_adddi3 (dest, subtarget,
-				 GEN_INT (val - (val & mask))));
-	  return;
+	  if (generate)
+	    {
+	      subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
+	      emit_insn (gen_rtx_SET (VOIDmode, subtarget,
+				      GEN_INT (val & mask)));
+	      emit_insn (gen_adddi3 (dest, subtarget,
+				     GEN_INT (val - (val & mask))));
+	    }
+	  num_insns += 2;
+	  return num_insns;
 	}
       else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
 	{
-	  subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
-
-	  emit_insn (gen_rtx_SET (VOIDmode, subtarget,
-				  GEN_INT ((val + comp) & mask)));
-	  emit_insn (gen_adddi3 (dest, subtarget,
-				 GEN_INT (val - ((val + comp) & mask))));
-	  return;
+	  if (generate)
+	    {
+	      subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
+	      emit_insn (gen_rtx_SET (VOIDmode, subtarget,
+				      GEN_INT ((val + comp) & mask)));
+	      emit_insn (gen_adddi3 (dest, subtarget,
+				     GEN_INT (val - ((val + comp) & mask))));
+	    }
+	  num_insns += 2;
+	  return num_insns;
 	}
       else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
 	{
-	  subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
-
-	  emit_insn (gen_rtx_SET (VOIDmode, subtarget,
-				  GEN_INT ((val - comp) | ~mask)));
-	  emit_insn (gen_adddi3 (dest, subtarget,
-				 GEN_INT (val - ((val - comp) | ~mask))));
-	  return;
+	  if (generate)
+	    {
+	      subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
+	      emit_insn (gen_rtx_SET (VOIDmode, subtarget,
+				      GEN_INT ((val - comp) | ~mask)));
+	      emit_insn (gen_adddi3 (dest, subtarget,
+				     GEN_INT (val - ((val - comp) | ~mask))));
+	    }
+	  num_insns += 2;
+	  return num_insns;
 	}
       else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
 	{
-	  subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
-
-	  emit_insn (gen_rtx_SET (VOIDmode, subtarget,
-				  GEN_INT (val | ~mask)));
-	  emit_insn (gen_adddi3 (dest, subtarget,
-				 GEN_INT (val - (val | ~mask))));
-	  return;
+	  if (generate)
+	    {
+	      subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
+	      emit_insn (gen_rtx_SET (VOIDmode, subtarget,
+				      GEN_INT (val | ~mask)));
+	      emit_insn (gen_adddi3 (dest, subtarget,
+				     GEN_INT (val - (val | ~mask))));
+	    }
+	  num_insns += 2;
+	  return num_insns;
 	}
     }
 
@@ -1088,12 +1294,16 @@
       if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
 	  || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
 	{
-	  subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
-	  emit_insn (gen_rtx_SET (VOIDmode, subtarget,
-				  GEN_INT (aarch64_bitmasks[i])));
-	  emit_insn (gen_adddi3 (dest, subtarget,
-				 GEN_INT (val - aarch64_bitmasks[i])));
-	  return;
+	  if (generate)
+	    {
+	      subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
+	      emit_insn (gen_rtx_SET (VOIDmode, subtarget,
+				      GEN_INT (aarch64_bitmasks[i])));
+	      emit_insn (gen_adddi3 (dest, subtarget,
+				     GEN_INT (val - aarch64_bitmasks[i])));
+	    }
+	  num_insns += 2;
+	  return num_insns;
 	}
 
       for (j = 0; j < 64; j += 16, mask <<= 16)
@@ -1100,11 +1310,15 @@
 	{
 	  if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
 	    {
-	      emit_insn (gen_rtx_SET (VOIDmode, dest,
-				      GEN_INT (aarch64_bitmasks[i])));
-	      emit_insn (gen_insv_immdi (dest, GEN_INT (j),
-					 GEN_INT ((val >> j) & 0xffff)));
-	      return;
+	      if (generate)
+		{
+		  emit_insn (gen_rtx_SET (VOIDmode, dest,
+					  GEN_INT (aarch64_bitmasks[i])));
+		  emit_insn (gen_insv_immdi (dest, GEN_INT (j),
+					     GEN_INT ((val >> j) & 0xffff)));
+		}
+	      num_insns += 2;
+	      return num_insns;
 	    }
 	}
     }
@@ -1119,12 +1333,16 @@
 	  for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
 	    if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
 	      {
-		subtarget = subtargets ? gen_reg_rtx (mode) : dest;
-		emit_insn (gen_rtx_SET (VOIDmode, subtarget,
-					GEN_INT (aarch64_bitmasks[i])));
-		emit_insn (gen_iordi3 (dest, subtarget,
-				       GEN_INT (aarch64_bitmasks[j])));
-		return;
+		if (generate)
+		  {
+		    subtarget = subtargets ? gen_reg_rtx (mode) : dest;
+		    emit_insn (gen_rtx_SET (VOIDmode, subtarget,
+					    GEN_INT (aarch64_bitmasks[i])));
+		    emit_insn (gen_iordi3 (dest, subtarget,
+					   GEN_INT (aarch64_bitmasks[j])));
+		  }
+		num_insns += 2;
+		return num_insns;
 	      }
 	}
       else if ((val & aarch64_bitmasks[i]) == val)
@@ -1134,17 +1352,44 @@
 	  for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
 	    if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
 	      {
-
-		subtarget = subtargets ? gen_reg_rtx (mode) : dest;
-		emit_insn (gen_rtx_SET (VOIDmode, subtarget,
-					GEN_INT (aarch64_bitmasks[j])));
-		emit_insn (gen_anddi3 (dest, subtarget,
-				       GEN_INT (aarch64_bitmasks[i])));
-		return;
+		if (generate)
+		  {
+		    subtarget = subtargets ? gen_reg_rtx (mode) : dest;
+		    emit_insn (gen_rtx_SET (VOIDmode, subtarget,
+					    GEN_INT (aarch64_bitmasks[j])));
+		    emit_insn (gen_anddi3 (dest, subtarget,
+					   GEN_INT (aarch64_bitmasks[i])));
+		  }
+		num_insns += 2;
+		return num_insns;
 	      }
 	}
     }
 
+  if (one_match > zero_match)
+    {
+      /* Set either first three quarters or all but the third.	 */
+      mask = 0xffffll << (16 - first_not_ffff_match);
+      if (generate)
+	emit_insn (gen_rtx_SET (VOIDmode, dest,
+				GEN_INT (val | mask | 0xffffffff00000000ull)));
+      num_insns ++;
+
+      /* Now insert other two quarters.	 */
+      for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << 1);
+	   i < 64; i += 16, mask <<= 16)
+	{
+	  if ((val & mask) != mask)
+	    {
+	      if (generate)
+		emit_insn (gen_insv_immdi (dest, GEN_INT (i),
+					   GEN_INT ((val >> i) & 0xffff)));
+	      num_insns ++;
+	    }
+	}
+      return num_insns;
+    }
+
  simple_sequence:
   first = true;
   mask = 0xffff;
@@ -1154,30 +1399,113 @@
 	{
 	  if (first)
 	    {
-	      emit_insn (gen_rtx_SET (VOIDmode, dest,
-				      GEN_INT (val & mask)));
+	      if (generate)
+		emit_insn (gen_rtx_SET (VOIDmode, dest,
+					GEN_INT (val & mask)));
+	      num_insns ++;
 	      first = false;
 	    }
 	  else
-	    emit_insn (gen_insv_immdi (dest, GEN_INT (i),
-				       GEN_INT ((val >> i) & 0xffff)));
+	    {
+	      if (generate)
+		emit_insn (gen_insv_immdi (dest, GEN_INT (i),
+					   GEN_INT ((val >> i) & 0xffff)));
+	      num_insns ++;
+	    }
 	}
     }
+
+  return num_insns;
 }
 
-static bool
-aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
+
+void
+aarch64_expand_mov_immediate (rtx dest, rtx imm)
 {
-  /* Indirect calls are not currently supported.  */
-  if (decl == NULL)
-    return false;
+  machine_mode mode = GET_MODE (dest);
 
-  /* Cannot tail-call to long-calls, since these are outside of the
-     range of a branch instruction (we could handle this if we added
-     support for indirect tail-calls.  */
-  if (aarch64_decl_is_long_call_p (decl))
-    return false;
+  gcc_assert (mode == SImode || mode == DImode);
 
+  /* Check on what type of symbol it is.  */
+  if (GET_CODE (imm) == SYMBOL_REF
+      || GET_CODE (imm) == LABEL_REF
+      || GET_CODE (imm) == CONST)
+    {
+      rtx mem, base, offset;
+      enum aarch64_symbol_type sty;
+
+      /* If we have (const (plus symbol offset)), separate out the offset
+	 before we start classifying the symbol.  */
+      split_const (imm, &base, &offset);
+
+      sty = aarch64_classify_symbol (base, offset, SYMBOL_CONTEXT_ADR);
+      switch (sty)
+	{
+	case SYMBOL_FORCE_TO_MEM:
+	  if (offset != const0_rtx
+	      && targetm.cannot_force_const_mem (mode, imm))
+	    {
+	      gcc_assert (can_create_pseudo_p ());
+	      base = aarch64_force_temporary (mode, dest, base);
+	      base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
+	      aarch64_emit_move (dest, base);
+	      return;
+	    }
+	  mem = force_const_mem (ptr_mode, imm);
+	  gcc_assert (mem);
+	  if (mode != ptr_mode)
+	    mem = gen_rtx_ZERO_EXTEND (mode, mem);
+	  emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
+	  return;
+
+        case SYMBOL_SMALL_TLSGD:
+        case SYMBOL_SMALL_TLSDESC:
+        case SYMBOL_SMALL_GOTTPREL:
+	case SYMBOL_SMALL_GOT:
+	case SYMBOL_TINY_GOT:
+	  if (offset != const0_rtx)
+	    {
+	      gcc_assert(can_create_pseudo_p ());
+	      base = aarch64_force_temporary (mode, dest, base);
+	      base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
+	      aarch64_emit_move (dest, base);
+	      return;
+	    }
+	  /* FALLTHRU */
+
+        case SYMBOL_SMALL_TPREL:
+	case SYMBOL_SMALL_ABSOLUTE:
+	case SYMBOL_TINY_ABSOLUTE:
+	  aarch64_load_symref_appropriately (dest, imm, sty);
+	  return;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  if (!CONST_INT_P (imm))
+    {
+      if (GET_CODE (imm) == HIGH)
+	emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
+      else
+        {
+	  rtx mem = force_const_mem (mode, imm);
+	  gcc_assert (mem);
+	  emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
+	}
+
+      return;
+    }
+
+  aarch64_internal_mov_immediate (dest, imm, true, GET_MODE (dest));
+}
+
+static bool
+aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
+				 tree exp ATTRIBUTE_UNUSED)
+{
+  /* Currently, always true.  */
   return true;
 }
 
@@ -1692,11 +2020,6 @@
 static bool
 aarch64_frame_pointer_required (void)
 {
-  /* If the function contains dynamic stack allocations, we need to
-     use the frame pointer to access the static parts of the frame.  */
-  if (cfun->calls_alloca)
-    return true;
-
   /* In aarch64_override_options_after_change
      flag_omit_leaf_frame_pointer turns off the frame pointer by
      default.  Turn it back on now if we've not got a leaf
@@ -1720,268 +2043,313 @@
   if (reload_completed && cfun->machine->frame.laid_out)
     return;
 
-  cfun->machine->frame.fp_lr_offset = 0;
+#define SLOT_NOT_REQUIRED (-2)
+#define SLOT_REQUIRED     (-1)
 
+  cfun->machine->frame.wb_candidate1 = FIRST_PSEUDO_REGISTER;
+  cfun->machine->frame.wb_candidate2 = FIRST_PSEUDO_REGISTER;
+
   /* First mark all the registers that really need to be saved...  */
   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
-    cfun->machine->frame.reg_offset[regno] = -1;
+    cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
 
   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
-    cfun->machine->frame.reg_offset[regno] = -1;
+    cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
 
   /* ... that includes the eh data registers (if needed)...  */
   if (crtl->calls_eh_return)
     for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
-      cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
+      cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)]
+	= SLOT_REQUIRED;
 
   /* ... and any callee saved register that dataflow says is live.  */
   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
     if (df_regs_ever_live_p (regno)
-	&& !call_used_regs[regno])
-      cfun->machine->frame.reg_offset[regno] = 0;
+	&& (regno == R30_REGNUM
+	    || !call_used_regs[regno]))
+      cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
 
   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
     if (df_regs_ever_live_p (regno)
 	&& !call_used_regs[regno])
-      cfun->machine->frame.reg_offset[regno] = 0;
+      cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
 
   if (frame_pointer_needed)
     {
-      cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
+      /* FP and LR are placed in the linkage record.  */
       cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
+      cfun->machine->frame.wb_candidate1 = R29_REGNUM;
+      cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
+      cfun->machine->frame.wb_candidate2 = R30_REGNUM;
       cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
+      offset += 2 * UNITS_PER_WORD;
     }
 
   /* Now assign stack slots for them.  */
-  for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
-    if (cfun->machine->frame.reg_offset[regno] != -1)
+  for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
+    if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
       {
 	cfun->machine->frame.reg_offset[regno] = offset;
+	if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
+	  cfun->machine->frame.wb_candidate1 = regno;
+	else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER)
+	  cfun->machine->frame.wb_candidate2 = regno;
 	offset += UNITS_PER_WORD;
       }
 
   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
-    if (cfun->machine->frame.reg_offset[regno] != -1)
+    if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
       {
 	cfun->machine->frame.reg_offset[regno] = offset;
+	if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
+	  cfun->machine->frame.wb_candidate1 = regno;
+	else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER
+		 && cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
+	  cfun->machine->frame.wb_candidate2 = regno;
 	offset += UNITS_PER_WORD;
       }
 
-  if (frame_pointer_needed)
-    {
-      cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
-      offset += UNITS_PER_WORD;
-      cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
-    }
-
-  if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
-    {
-      cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
-      offset += UNITS_PER_WORD;
-      cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
-    }
-
   cfun->machine->frame.padding0 =
     (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
   offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
 
   cfun->machine->frame.saved_regs_size = offset;
+
+  cfun->machine->frame.hard_fp_offset
+    = AARCH64_ROUND_UP (cfun->machine->frame.saved_varargs_size
+			+ get_frame_size ()
+			+ cfun->machine->frame.saved_regs_size,
+			STACK_BOUNDARY / BITS_PER_UNIT);
+
+  cfun->machine->frame.frame_size
+    = AARCH64_ROUND_UP (cfun->machine->frame.hard_fp_offset
+			+ crtl->outgoing_args_size,
+			STACK_BOUNDARY / BITS_PER_UNIT);
+
   cfun->machine->frame.laid_out = true;
 }
 
-/* Make the last instruction frame-related and note that it performs
-   the operation described by FRAME_PATTERN.  */
+static bool
+aarch64_register_saved_on_entry (int regno)
+{
+  return cfun->machine->frame.reg_offset[regno] >= 0;
+}
 
+static unsigned
+aarch64_next_callee_save (unsigned regno, unsigned limit)
+{
+  while (regno <= limit && !aarch64_register_saved_on_entry (regno))
+    regno ++;
+  return regno;
+}
+
 static void
-aarch64_set_frame_expr (rtx frame_pattern)
+aarch64_pushwb_single_reg (enum machine_mode mode, unsigned regno,
+			   HOST_WIDE_INT adjustment)
+ {
+  rtx base_rtx = stack_pointer_rtx;
+  rtx insn, reg, mem;
+
+  reg = gen_rtx_REG (mode, regno);
+  mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
+			    plus_constant (Pmode, base_rtx, -adjustment));
+  mem = gen_rtx_MEM (mode, mem);
+
+  insn = emit_move_insn (mem, reg);
+  RTX_FRAME_RELATED_P (insn) = 1;
+}
+
+static rtx
+aarch64_gen_storewb_pair (enum machine_mode mode, rtx base, rtx reg, rtx reg2,
+			  HOST_WIDE_INT adjustment)
 {
+  switch (mode)
+    {
+    case DImode:
+      return gen_storewb_pairdi_di (base, base, reg, reg2,
+				    GEN_INT (-adjustment),
+				    GEN_INT (UNITS_PER_WORD - adjustment));
+    case DFmode:
+      return gen_storewb_pairdf_di (base, base, reg, reg2,
+				    GEN_INT (-adjustment),
+				    GEN_INT (UNITS_PER_WORD - adjustment));
+    default:
+      gcc_unreachable ();
+    }
+}
+
+static void
+aarch64_pushwb_pair_reg (enum machine_mode mode, unsigned regno1,
+			 unsigned regno2, HOST_WIDE_INT adjustment)
+{
   rtx insn;
+  rtx reg1 = gen_rtx_REG (mode, regno1);
+  rtx reg2 = gen_rtx_REG (mode, regno2);
 
-  insn = get_last_insn ();
+  insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
+					      reg2, adjustment));
+  RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
+
+  RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
   RTX_FRAME_RELATED_P (insn) = 1;
-  RTX_FRAME_RELATED_P (frame_pattern) = 1;
-  REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
-				      frame_pattern,
-				      REG_NOTES (insn));
 }
 
-static bool
-aarch64_register_saved_on_entry (int regno)
+static rtx
+aarch64_gen_loadwb_pair (enum machine_mode mode, rtx base, rtx reg, rtx reg2,
+			 HOST_WIDE_INT adjustment)
 {
-  return cfun->machine->frame.reg_offset[regno] != -1;
+  switch (mode)
+    {
+    case DImode:
+      return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment),
+				   GEN_INT (UNITS_PER_WORD));
+    case DFmode:
+      return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
+				   GEN_INT (UNITS_PER_WORD));
+    default:
+      gcc_unreachable ();
+    }
 }
 
+static rtx
+aarch64_gen_store_pair (enum machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
+			rtx reg2)
+{
+  switch (mode)
+    {
+    case DImode:
+      return gen_store_pairdi (mem1, reg1, mem2, reg2);
 
-static void
-aarch64_save_or_restore_fprs (int start_offset, int increment,
-			      bool restore, rtx base_rtx)
+    case DFmode:
+      return gen_store_pairdf (mem1, reg1, mem2, reg2);
 
+    default:
+      gcc_unreachable ();
+    }
+}
+
+static rtx
+aarch64_gen_load_pair (enum machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
+		       rtx mem2)
 {
+  switch (mode)
+    {
+    case DImode:
+      return gen_load_pairdi (reg1, mem1, reg2, mem2);
+
+    case DFmode:
+      return gen_load_pairdf (reg1, mem1, reg2, mem2);
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+
+static void
+aarch64_save_callee_saves (enum machine_mode mode, HOST_WIDE_INT start_offset,
+			   unsigned start, unsigned limit, bool skip_wb)
+{
+  rtx insn;
+  rtx (*gen_mem_ref) (enum machine_mode, rtx) = (frame_pointer_needed
+						 ? gen_frame_mem : gen_rtx_MEM);
   unsigned regno;
   unsigned regno2;
-  rtx insn;
-  rtx (*gen_mem_ref)(enum machine_mode, rtx)
-    = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
 
-
-  for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
+  for (regno = aarch64_next_callee_save (start, limit);
+       regno <= limit;
+       regno = aarch64_next_callee_save (regno + 1, limit))
     {
-      if (aarch64_register_saved_on_entry (regno))
-	{
-	  rtx mem;
-	  mem = gen_mem_ref (DFmode,
-			     plus_constant (Pmode,
-					    base_rtx,
-					    start_offset));
+      rtx reg, mem;
+      HOST_WIDE_INT offset;
 
-	  for (regno2 = regno + 1;
-	       regno2 <= V31_REGNUM
-		 && !aarch64_register_saved_on_entry (regno2);
-	       regno2++)
-	    {
-	      /* Empty loop.  */
-	    }
-	  if (regno2 <= V31_REGNUM &&
-	      aarch64_register_saved_on_entry (regno2))
-	    {
-	      rtx mem2;
-	      /* Next highest register to be saved.  */
-	      mem2 = gen_mem_ref (DFmode,
-				  plus_constant
-				  (Pmode,
-				   base_rtx,
-				   start_offset + increment));
-	      if (restore == false)
-		{
-		  insn = emit_insn
-		    ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
-					mem2, gen_rtx_REG (DFmode, regno2)));
+      if (skip_wb
+	  && (regno == cfun->machine->frame.wb_candidate1
+	      || regno == cfun->machine->frame.wb_candidate2))
+	continue;
 
-		}
-	      else
-		{
-		  insn = emit_insn
-		    ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
-				       gen_rtx_REG (DFmode, regno2), mem2));
+      reg = gen_rtx_REG (mode, regno);
+      offset = start_offset + cfun->machine->frame.reg_offset[regno];
+      mem = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
+					      offset));
 
-		  add_reg_note (insn, REG_CFA_RESTORE,
-				gen_rtx_REG (DFmode, regno));
-		  add_reg_note (insn, REG_CFA_RESTORE,
-				gen_rtx_REG (DFmode, regno2));
-		}
+      regno2 = aarch64_next_callee_save (regno + 1, limit);
 
-		  /* The first part of a frame-related parallel insn
-		     is always assumed to be relevant to the frame
-		     calculations; subsequent parts, are only
-		     frame-related if explicitly marked.  */
-	      RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
-	      regno = regno2;
-	      start_offset += increment * 2;
-	    }
-	  else
-	    {
-	      if (restore == false)
-		insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
-	      else
-		{
-		  insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
-		  add_reg_note (insn, REG_CFA_RESTORE,
-				gen_rtx_REG (DImode, regno));
-		}
-	      start_offset += increment;
-	    }
-	  RTX_FRAME_RELATED_P (insn) = 1;
+      if (regno2 <= limit
+	  && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
+	      == cfun->machine->frame.reg_offset[regno2]))
+
+	{
+	  rtx reg2 = gen_rtx_REG (mode, regno2);
+	  rtx mem2;
+
+	  offset = start_offset + cfun->machine->frame.reg_offset[regno2];
+	  mem2 = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
+						   offset));
+	  insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
+						    reg2));
+
+	  /* The first part of a frame-related parallel insn is
+	     always assumed to be relevant to the frame
+	     calculations; subsequent parts, are only
+	     frame-related if explicitly marked.  */
+	  RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
+	  regno = regno2;
 	}
+      else
+	insn = emit_move_insn (mem, reg);
+
+      RTX_FRAME_RELATED_P (insn) = 1;
     }
-
 }
 
-
-/* offset from the stack pointer of where the saves and
-   restore's have to happen.  */
 static void
-aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
-					    bool restore)
+aarch64_restore_callee_saves (enum machine_mode mode,
+			      HOST_WIDE_INT start_offset, unsigned start,
+			      unsigned limit, bool skip_wb, rtx *cfi_ops)
 {
-  rtx insn;
   rtx base_rtx = stack_pointer_rtx;
-  HOST_WIDE_INT start_offset = offset;
-  HOST_WIDE_INT increment = UNITS_PER_WORD;
-  rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
-  unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
+  rtx (*gen_mem_ref) (enum machine_mode, rtx) = (frame_pointer_needed
+						 ? gen_frame_mem : gen_rtx_MEM);
   unsigned regno;
   unsigned regno2;
+  HOST_WIDE_INT offset;
 
-  for (regno = R0_REGNUM; regno <= limit; regno++)
+  for (regno = aarch64_next_callee_save (start, limit);
+       regno <= limit;
+       regno = aarch64_next_callee_save (regno + 1, limit))
     {
-      if (aarch64_register_saved_on_entry (regno))
-	{
-	  rtx mem;
-	  mem = gen_mem_ref (Pmode,
-			     plus_constant (Pmode,
-					    base_rtx,
-					    start_offset));
+      rtx reg, mem;
 
-	  for (regno2 = regno + 1;
-	       regno2 <= limit
-		 && !aarch64_register_saved_on_entry (regno2);
-	       regno2++)
-	    {
-	      /* Empty loop.  */
-	    }
-	  if (regno2 <= limit &&
-	      aarch64_register_saved_on_entry (regno2))
-	    {
-	      rtx mem2;
-	      /* Next highest register to be saved.  */
-	      mem2 = gen_mem_ref (Pmode,
-				  plus_constant
-				  (Pmode,
-				   base_rtx,
-				   start_offset + increment));
-	      if (restore == false)
-		{
-		  insn = emit_insn
-		    ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
-					mem2, gen_rtx_REG (DImode, regno2)));
+      if (skip_wb
+	  && (regno == cfun->machine->frame.wb_candidate1
+	      || regno == cfun->machine->frame.wb_candidate2))
+	continue;
 
-		}
-	      else
-		{
-		  insn = emit_insn
-		    ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
-				     gen_rtx_REG (DImode, regno2), mem2));
+      reg = gen_rtx_REG (mode, regno);
+      offset = start_offset + cfun->machine->frame.reg_offset[regno];
+      mem = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
 
-		  add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
-		  add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
-		}
+      regno2 = aarch64_next_callee_save (regno + 1, limit);
 
-		  /* The first part of a frame-related parallel insn
-		     is always assumed to be relevant to the frame
-		     calculations; subsequent parts, are only
-		     frame-related if explicitly marked.  */
-	      RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
-					    1)) = 1;
-	      regno = regno2;
-	      start_offset += increment * 2;
-	    }
-	  else
-	    {
-	      if (restore == false)
-		insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
-	      else
-		{
-		  insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
-		  add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
-		}
-	      start_offset += increment;
-	    }
-	  RTX_FRAME_RELATED_P (insn) = 1;
+      if (regno2 <= limit
+	  && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
+	      == cfun->machine->frame.reg_offset[regno2]))
+	{
+	  rtx reg2 = gen_rtx_REG (mode, regno2);
+	  rtx mem2;
+
+	  offset = start_offset + cfun->machine->frame.reg_offset[regno2];
+	  mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
+	  emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
+
+	  *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
+	  regno = regno2;
 	}
+      else
+	emit_move_insn (reg, mem);
+      *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops);
     }
-
-  aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
-
 }
 
 /* AArch64 stack frames generated by this compiler look like:
@@ -1990,37 +2358,35 @@
 	|                               |
 	|  incoming stack arguments     |
 	|                               |
-	+-------------------------------+ <-- arg_pointer_rtx
-	|                               |
+	+-------------------------------+
+	|                               | <-- incoming stack pointer (aligned)
 	|  callee-allocated save area   |
 	|  for register varargs         |
 	|                               |
-	+-------------------------------+ <-- frame_pointer_rtx
+	+-------------------------------+
+	|  local variables              | <-- frame_pointer_rtx
 	|                               |
-	|  local variables              |
-	|                               |
 	+-------------------------------+
 	|  padding0                     | \
 	+-------------------------------+  |
-	|                               |  |
-	|                               |  |
 	|  callee-saved registers       |  | frame.saved_regs_size
-	|                               |  |
 	+-------------------------------+  |
 	|  LR'                          |  |
 	+-------------------------------+  |
-	|  FP'                          | /
-      P +-------------------------------+ <-- hard_frame_pointer_rtx
+	|  FP'                          | / <- hard_frame_pointer_rtx (aligned)
+        +-------------------------------+
 	|  dynamic allocation           |
 	+-------------------------------+
-	|                               |
-	|  outgoing stack arguments     |
-	|                               |
-	+-------------------------------+ <-- stack_pointer_rtx
+	|  padding                      |
+	+-------------------------------+
+	|  outgoing stack arguments     | <-- arg_pointer
+        |                               |
+	+-------------------------------+
+	|                               | <-- stack_pointer_rtx (aligned)
 
-   Dynamic stack allocations such as alloca insert data at point P.
-   They decrease stack_pointer_rtx but leave frame_pointer_rtx and
-   hard_frame_pointer_rtx unchanged.  */
+   Dynamic stack allocations via alloca() decrease stack_pointer_rtx
+   but leave frame_pointer_rtx and hard_frame_pointer_rtx
+   unchanged.  */
 
 /* Generate the prologue instructions for entry into a function.
    Establish the stack frame by decreasing the stack pointer with a
@@ -2038,27 +2404,20 @@
 
      sub sp, sp, <final_adjustment_if_any>
   */
-  HOST_WIDE_INT original_frame_size;	/* local variables + vararg save */
   HOST_WIDE_INT frame_size, offset;
-  HOST_WIDE_INT fp_offset;		/* FP offset from SP */
+  HOST_WIDE_INT fp_offset;		/* Offset from hard FP to SP.  */
+  HOST_WIDE_INT hard_fp_offset;
   rtx insn;
 
   aarch64_layout_frame ();
-  original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
-  gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
-	      && (cfun->stdarg || !cfun->machine->saved_varargs_size));
-  frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
-		+ crtl->outgoing_args_size);
-  offset = frame_size = AARCH64_ROUND_UP (frame_size,
-					  STACK_BOUNDARY / BITS_PER_UNIT);
 
+  offset = frame_size = cfun->machine->frame.frame_size;
+  hard_fp_offset = cfun->machine->frame.hard_fp_offset;
+  fp_offset = frame_size - hard_fp_offset;
+
   if (flag_stack_usage_info)
     current_function_static_stack_size = frame_size;
 
-  fp_offset = (offset
-	       - original_frame_size
-	       - cfun->machine->frame.saved_regs_size);
-
   /* Store pairs and load pairs have a range only -512 to 504.  */
   if (offset >= 512)
     {
@@ -2068,7 +2427,7 @@
 	 register area.  This will allow the pre-index write-back
 	 store pair instructions to be used for setting up the stack frame
 	 efficiently.  */
-      offset = original_frame_size + cfun->machine->frame.saved_regs_size;
+      offset = hard_fp_offset;
       if (offset >= 512)
 	offset = cfun->machine->frame.saved_regs_size;
 
@@ -2079,29 +2438,29 @@
 	{
 	  rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
 	  emit_move_insn (op0, GEN_INT (-frame_size));
-	  emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
-	  aarch64_set_frame_expr (gen_rtx_SET
-				  (Pmode, stack_pointer_rtx,
-				   plus_constant (Pmode,
-						  stack_pointer_rtx,
-						  -frame_size)));
+	  insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
+
+	  add_reg_note (insn, REG_CFA_ADJUST_CFA,
+			gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+				     plus_constant (Pmode, stack_pointer_rtx,
+						    -frame_size)));
+	  RTX_FRAME_RELATED_P (insn) = 1;
 	}
       else if (frame_size > 0)
 	{
-	  if ((frame_size & 0xfff) != frame_size)
+	  int hi_ofs = frame_size & 0xfff000;
+	  int lo_ofs = frame_size & 0x000fff;
+
+	  if (hi_ofs)
 	    {
 	      insn = emit_insn (gen_add2_insn
-				(stack_pointer_rtx,
-				 GEN_INT (-(frame_size
-					    & ~(HOST_WIDE_INT)0xfff))));
+				(stack_pointer_rtx, GEN_INT (-hi_ofs)));
 	      RTX_FRAME_RELATED_P (insn) = 1;
 	    }
-	  if ((frame_size & 0xfff) != 0)
+	  if (lo_ofs)
 	    {
 	      insn = emit_insn (gen_add2_insn
-				(stack_pointer_rtx,
-				 GEN_INT (-(frame_size
-					    & (HOST_WIDE_INT)0xfff))));
+				(stack_pointer_rtx, GEN_INT (-lo_ofs)));
 	      RTX_FRAME_RELATED_P (insn) = 1;
 	    }
 	}
@@ -2111,12 +2470,11 @@
 
   if (offset > 0)
     {
-      /* Save the frame pointer and lr if the frame pointer is needed
-	 first.  Make the frame pointer point to the location of the
-	 old frame pointer on the stack.  */
+      bool skip_wb = false;
+
       if (frame_pointer_needed)
 	{
-	  rtx mem_fp, mem_lr;
+	  skip_wb = true;
 
 	  if (fp_offset)
 	    {
@@ -2123,67 +2481,52 @@
 	      insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
 					       GEN_INT (-offset)));
 	      RTX_FRAME_RELATED_P (insn) = 1;
-	      aarch64_set_frame_expr (gen_rtx_SET
-				      (Pmode, stack_pointer_rtx,
-				       gen_rtx_MINUS (Pmode,
-						      stack_pointer_rtx,
-						      GEN_INT (offset))));
-	      mem_fp = gen_frame_mem (DImode,
-				      plus_constant (Pmode,
-						     stack_pointer_rtx,
-						     fp_offset));
-	      mem_lr = gen_frame_mem (DImode,
-				      plus_constant (Pmode,
-						     stack_pointer_rtx,
-						     fp_offset
-						     + UNITS_PER_WORD));
-	      insn = emit_insn (gen_store_pairdi (mem_fp,
-						  hard_frame_pointer_rtx,
-						  mem_lr,
-						  gen_rtx_REG (DImode,
-							       LR_REGNUM)));
+
+	      aarch64_save_callee_saves (DImode, fp_offset, R29_REGNUM,
+					 R30_REGNUM, false);
 	    }
 	  else
-	    {
-	      insn = emit_insn (gen_storewb_pairdi_di
-				(stack_pointer_rtx, stack_pointer_rtx,
-				 hard_frame_pointer_rtx,
-				 gen_rtx_REG (DImode, LR_REGNUM),
-				 GEN_INT (-offset),
-				 GEN_INT (GET_MODE_SIZE (DImode) - offset)));
-	      RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
-	    }
+	    aarch64_pushwb_pair_reg (DImode, R29_REGNUM, R30_REGNUM, offset);
 
-	  /* The first part of a frame-related parallel insn is always
-	     assumed to be relevant to the frame calculations;
-	     subsequent parts, are only frame-related if explicitly
-	     marked.  */
-	  RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
-	  RTX_FRAME_RELATED_P (insn) = 1;
-
 	  /* Set up frame pointer to point to the location of the
 	     previous frame pointer on the stack.  */
 	  insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
 					   stack_pointer_rtx,
 					   GEN_INT (fp_offset)));
-	  aarch64_set_frame_expr (gen_rtx_SET
-				  (Pmode, hard_frame_pointer_rtx,
-				   plus_constant (Pmode,
-						  stack_pointer_rtx,
-						  fp_offset)));
 	  RTX_FRAME_RELATED_P (insn) = 1;
-	  insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
-					   hard_frame_pointer_rtx));
+	  emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
 	}
       else
 	{
-	  insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
-					   GEN_INT (-offset)));
-	  RTX_FRAME_RELATED_P (insn) = 1;
+	  unsigned reg1 = cfun->machine->frame.wb_candidate1;
+	  unsigned reg2 = cfun->machine->frame.wb_candidate2;
+
+	  if (fp_offset
+	      || reg1 == FIRST_PSEUDO_REGISTER
+	      || (reg2 == FIRST_PSEUDO_REGISTER
+		  && offset >= 256))
+	    {
+	      insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
+					       GEN_INT (-offset)));
+	      RTX_FRAME_RELATED_P (insn) = 1;
+	    }
+	  else
+	    {
+	      enum machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
+
+	      skip_wb = true;
+
+	      if (reg2 == FIRST_PSEUDO_REGISTER)
+		aarch64_pushwb_single_reg (mode1, reg1, offset);
+	      else
+		aarch64_pushwb_pair_reg (mode1, reg1, reg2, offset);
+	    }
 	}
 
-      aarch64_save_or_restore_callee_save_registers
-	(fp_offset + cfun->machine->frame.hardfp_offset, 0);
+      aarch64_save_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
+				 skip_wb);
+      aarch64_save_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
+				 skip_wb);
     }
 
   /* when offset >= 512,
@@ -2204,28 +2547,24 @@
 void
 aarch64_expand_epilogue (bool for_sibcall)
 {
-  HOST_WIDE_INT original_frame_size, frame_size, offset;
+  HOST_WIDE_INT frame_size, offset;
   HOST_WIDE_INT fp_offset;
+  HOST_WIDE_INT hard_fp_offset;
   rtx insn;
-  rtx cfa_reg;
+  /* We need to add memory barrier to prevent read from deallocated stack.  */
+  bool need_barrier_p = (get_frame_size () != 0
+			 || cfun->machine->frame.saved_varargs_size);
 
   aarch64_layout_frame ();
-  original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
-  frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
-		+ crtl->outgoing_args_size);
-  offset = frame_size = AARCH64_ROUND_UP (frame_size,
-					  STACK_BOUNDARY / BITS_PER_UNIT);
 
-  fp_offset = (offset
-	       - original_frame_size
-	       - cfun->machine->frame.saved_regs_size);
+  offset = frame_size = cfun->machine->frame.frame_size;
+  hard_fp_offset = cfun->machine->frame.hard_fp_offset;
+  fp_offset = frame_size - hard_fp_offset;
 
-  cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
-
   /* Store pairs and load pairs have a range only -512 to 504.  */
   if (offset >= 512)
     {
-      offset = original_frame_size + cfun->machine->frame.saved_regs_size;
+      offset = hard_fp_offset;
       if (offset >= 512)
 	offset = cfun->machine->frame.saved_regs_size;
 
@@ -2249,74 +2588,59 @@
   if (frame_pointer_needed
       && (crtl->outgoing_args_size || cfun->calls_alloca))
     {
+      if (cfun->calls_alloca)
+	emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
+
       insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
 				       hard_frame_pointer_rtx,
-				       GEN_INT (- fp_offset)));
-      RTX_FRAME_RELATED_P (insn) = 1;
-      /* As SP is set to (FP - fp_offset), according to the rules in
-	 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
-	 from the value of SP from now on.  */
-      cfa_reg = stack_pointer_rtx;
+				       GEN_INT (0)));
+      offset = offset - fp_offset;
     }
 
-  aarch64_save_or_restore_callee_save_registers
-    (fp_offset + cfun->machine->frame.hardfp_offset, 1);
-
-  /* Restore the frame pointer and lr if the frame pointer is needed.  */
   if (offset > 0)
     {
+      unsigned reg1 = cfun->machine->frame.wb_candidate1;
+      unsigned reg2 = cfun->machine->frame.wb_candidate2;
+      bool skip_wb = true;
+      rtx cfi_ops = NULL;
+
       if (frame_pointer_needed)
+	fp_offset = 0;
+      else if (fp_offset
+	       || reg1 == FIRST_PSEUDO_REGISTER
+	       || (reg2 == FIRST_PSEUDO_REGISTER
+		   && offset >= 256))
+	skip_wb = false;
+
+      aarch64_restore_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
+				    skip_wb, &cfi_ops);
+      aarch64_restore_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
+				    skip_wb, &cfi_ops);
+
+      if (need_barrier_p)
+	emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
+
+      if (skip_wb)
 	{
-	  rtx mem_fp, mem_lr;
+	  enum machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
+	  rtx rreg1 = gen_rtx_REG (mode1, reg1);
 
-	  if (fp_offset)
+	  cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg1, cfi_ops);
+	  if (reg2 == FIRST_PSEUDO_REGISTER)
 	    {
-	      mem_fp = gen_frame_mem (DImode,
-				      plus_constant (Pmode,
-						     stack_pointer_rtx,
-						     fp_offset));
-	      mem_lr = gen_frame_mem (DImode,
-				      plus_constant (Pmode,
-						     stack_pointer_rtx,
-						     fp_offset
-						     + UNITS_PER_WORD));
-	      insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
-						 mem_fp,
-						 gen_rtx_REG (DImode,
-							      LR_REGNUM),
-						 mem_lr));
+	      rtx mem = plus_constant (Pmode, stack_pointer_rtx, offset);
+	      mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
+	      mem = gen_rtx_MEM (mode1, mem);
+	      insn = emit_move_insn (rreg1, mem);
 	    }
 	  else
 	    {
-	      insn = emit_insn (gen_loadwb_pairdi_di
-				(stack_pointer_rtx,
-				 stack_pointer_rtx,
-				 hard_frame_pointer_rtx,
-				 gen_rtx_REG (DImode, LR_REGNUM),
-				 GEN_INT (offset),
-				 GEN_INT (GET_MODE_SIZE (DImode) + offset)));
-	      RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
-	      add_reg_note (insn, REG_CFA_ADJUST_CFA,
-			    (gen_rtx_SET (Pmode, stack_pointer_rtx,
-					  plus_constant (Pmode, cfa_reg,
-							 offset))));
-	    }
+	      rtx rreg2 = gen_rtx_REG (mode1, reg2);
 
-	  /* The first part of a frame-related parallel insn
-	     is always assumed to be relevant to the frame
-	     calculations; subsequent parts, are only
-	     frame-related if explicitly marked.  */
-	  RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
-	  RTX_FRAME_RELATED_P (insn) = 1;
-	  add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
-	  add_reg_note (insn, REG_CFA_RESTORE,
-			gen_rtx_REG (DImode, LR_REGNUM));
-
-	  if (fp_offset)
-	    {
-	      insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
-					       GEN_INT (offset)));
-	      RTX_FRAME_RELATED_P (insn) = 1;
+	      cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg2, cfi_ops);
+	      insn = emit_insn (aarch64_gen_loadwb_pair
+				(mode1, stack_pointer_rtx, rreg1,
+				 rreg2, offset));
 	    }
 	}
       else
@@ -2323,79 +2647,60 @@
 	{
 	  insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
 					   GEN_INT (offset)));
-	  RTX_FRAME_RELATED_P (insn) = 1;
 	}
-    }
 
-  /* Stack adjustment for exception handler.  */
-  if (crtl->calls_eh_return)
-    {
-      /* We need to unwind the stack by the offset computed by
-	 EH_RETURN_STACKADJ_RTX.  However, at this point the CFA is
-	 based on SP.  Ideally we would update the SP and define the
-	 CFA along the lines of:
-
-	 SP = SP + EH_RETURN_STACKADJ_RTX
-	 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
-
-	 However the dwarf emitter only understands a constant
-	 register offset.
-
-	 The solution chosen here is to use the otherwise unused IP0
-	 as a temporary register to hold the current SP value.  The
-	 CFA is described using IP0 then SP is modified.  */
-
-      rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
-
-      insn = emit_move_insn (ip0, stack_pointer_rtx);
-      add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
+      /* Reset the CFA to be SP + FRAME_SIZE.  */
+      rtx new_cfa = stack_pointer_rtx;
+      if (frame_size > 0)
+	new_cfa = plus_constant (Pmode, new_cfa, frame_size);
+      cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops);
+      REG_NOTES (insn) = cfi_ops;
       RTX_FRAME_RELATED_P (insn) = 1;
-
-      emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
-
-      /* Ensure the assignment to IP0 does not get optimized away.  */
-      emit_use (ip0);
     }
 
-  if (frame_size > -1)
+  if (frame_size > 0)
     {
+      if (need_barrier_p)
+	emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
+
       if (frame_size >= 0x1000000)
 	{
 	  rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
 	  emit_move_insn (op0, GEN_INT (frame_size));
-	  emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
-	  aarch64_set_frame_expr (gen_rtx_SET
-				  (Pmode, stack_pointer_rtx,
-				   plus_constant (Pmode,
-						  stack_pointer_rtx,
-						  frame_size)));
+	  insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
 	}
-      else if (frame_size > 0)
+      else
 	{
-	  if ((frame_size & 0xfff) != 0)
+          int hi_ofs = frame_size & 0xfff000;
+          int lo_ofs = frame_size & 0x000fff;
+
+	  if (hi_ofs && lo_ofs)
 	    {
 	      insn = emit_insn (gen_add2_insn
-				(stack_pointer_rtx,
-				 GEN_INT ((frame_size
-					   & (HOST_WIDE_INT) 0xfff))));
+				(stack_pointer_rtx, GEN_INT (hi_ofs)));
 	      RTX_FRAME_RELATED_P (insn) = 1;
+	      frame_size = lo_ofs;
 	    }
-	  if ((frame_size & 0xfff) != frame_size)
-	    {
-	      insn = emit_insn (gen_add2_insn
-				(stack_pointer_rtx,
-				 GEN_INT ((frame_size
-					   & ~ (HOST_WIDE_INT) 0xfff))));
-	      RTX_FRAME_RELATED_P (insn) = 1;
-	    }
+	  insn = emit_insn (gen_add2_insn
+			    (stack_pointer_rtx, GEN_INT (frame_size)));
 	}
 
-        aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
-					     plus_constant (Pmode,
-							    stack_pointer_rtx,
-							    offset)));
+      /* Reset the CFA to be SP + 0.  */
+      add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
+      RTX_FRAME_RELATED_P (insn) = 1;
     }
 
+  /* Stack adjustment for exception handler.  */
+  if (crtl->calls_eh_return)
+    {
+      /* We need to unwind the stack by the offset computed by
+	 EH_RETURN_STACKADJ_RTX.  We have already reset the CFA
+	 to be SP; letting the CFA move during this adjustment
+	 is just as correct as retaining the CFA from the body
+	 of the function.  Therefore, do nothing special.  */
+      emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
+    }
+
   emit_use (gen_rtx_REG (DImode, LR_REGNUM));
   if (!for_sibcall)
     emit_jump_insn (ret_rtx);
@@ -2407,17 +2712,13 @@
 rtx
 aarch64_final_eh_return_addr (void)
 {
-  HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
+  HOST_WIDE_INT fp_offset;
+
   aarch64_layout_frame ();
-  original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
-  frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
-		+ crtl->outgoing_args_size);
-  offset = frame_size = AARCH64_ROUND_UP (frame_size,
-					  STACK_BOUNDARY / BITS_PER_UNIT);
-  fp_offset = offset
-    - original_frame_size
-    - cfun->machine->frame.saved_regs_size;
 
+  fp_offset = cfun->machine->frame.frame_size
+	      - cfun->machine->frame.hard_fp_offset;
+
   if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
     return gen_rtx_REG (DImode, LR_REGNUM);
 
@@ -2453,12 +2754,22 @@
 				       - 2 * UNITS_PER_WORD));
 }
 
-/* Output code to build up a constant in a register.  */
-static void
-aarch64_build_constant (int regnum, HOST_WIDE_INT val)
+/* Possibly output code to build up a constant in a register.  For
+   the benefit of the costs infrastructure, returns the number of
+   instructions which would be emitted.  GENERATE inhibits or
+   enables code generation.  */
+
+static int
+aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate)
 {
+  int insns = 0;
+
   if (aarch64_bitmask_imm (val, DImode))
-    emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
+    {
+      if (generate)
+	emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
+      insns = 1;
+    }
   else
     {
       int i;
@@ -2489,15 +2800,19 @@
 	 the same.  */
       if (ncount < zcount)
 	{
-	  emit_move_insn (gen_rtx_REG (Pmode, regnum),
-			  GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
+	  if (generate)
+	    emit_move_insn (gen_rtx_REG (Pmode, regnum),
+			    GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
 	  tval = 0xffff;
+	  insns++;
 	}
       else
 	{
-	  emit_move_insn (gen_rtx_REG (Pmode, regnum),
-			  GEN_INT (val & 0xffff));
+	  if (generate)
+	    emit_move_insn (gen_rtx_REG (Pmode, regnum),
+			    GEN_INT (val & 0xffff));
 	  tval = 0;
+	  insns++;
 	}
 
       val >>= 16;
@@ -2505,11 +2820,17 @@
       for (i = 16; i < 64; i += 16)
 	{
 	  if ((val & 0xffff) != tval)
-	    emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
-				       GEN_INT (i), GEN_INT (val & 0xffff)));
+	    {
+	      if (generate)
+		emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
+					   GEN_INT (i),
+					   GEN_INT (val & 0xffff)));
+	      insns++;
+	    }
 	  val >>= 16;
 	}
     }
+  return insns;
 }
 
 static void
@@ -2524,7 +2845,7 @@
 
   if (mdelta >= 4096 * 4096)
     {
-      aarch64_build_constant (scratchreg, delta);
+      (void) aarch64_build_constant (scratchreg, delta, true);
       emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
     }
   else if (mdelta > 0)
@@ -2598,7 +2919,7 @@
 	  addr = plus_constant (Pmode, temp0, vcall_offset);
       else
 	{
-	  aarch64_build_constant (IP1_REGNUM, vcall_offset);
+	  (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true);
 	  addr = gen_rtx_PLUS (Pmode, temp0, temp1);
 	}
 
@@ -3015,8 +3336,8 @@
   return false;
 }
 
-static inline bool
-offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
+bool
+aarch64_offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
 {
   return (offset >= -64 * GET_MODE_SIZE (mode)
 	  && offset < 64 * GET_MODE_SIZE (mode)
@@ -3050,11 +3371,11 @@
   enum rtx_code code = GET_CODE (x);
   rtx op0, op1;
   bool allow_reg_index_p =
-    outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
-
+    outer_code != PARALLEL && (GET_MODE_SIZE (mode) != 16
+			       || aarch64_vector_mode_supported_p (mode));
   /* Don't support anything other than POST_INC or REG addressing for
      AdvSIMD.  */
-  if (aarch64_vector_mode_p (mode)
+  if (aarch64_vect_struct_mode_p (mode)
       && (code != POST_INC && code != REG))
     return false;
 
@@ -3070,6 +3391,21 @@
     case PLUS:
       op0 = XEXP (x, 0);
       op1 = XEXP (x, 1);
+
+      if (! strict_p
+	  && REG_P (op0)
+	  && (op0 == virtual_stack_vars_rtx
+	      || op0 == frame_pointer_rtx
+	      || op0 == arg_pointer_rtx)
+	  && CONST_INT_P (op1))
+	{
+	  info->type = ADDRESS_REG_IMM;
+	  info->base = op0;
+	  info->offset = op1;
+
+	  return true;
+	}
+
       if (GET_MODE_SIZE (mode) != 0
 	  && CONST_INT_P (op1)
 	  && aarch64_base_register_rtx_p (op0, strict_p))
@@ -3088,12 +3424,12 @@
 	     We conservatively require an offset representable in either mode.
 	   */
 	  if (mode == TImode || mode == TFmode)
-	    return (offset_7bit_signed_scaled_p (mode, offset)
+	    return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
 		    && offset_9bit_signed_unscaled_p (mode, offset));
 
 	  if (outer_code == PARALLEL)
 	    return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
-		    && offset_7bit_signed_scaled_p (mode, offset));
+		    && aarch64_offset_7bit_signed_scaled_p (mode, offset));
 	  else
 	    return (offset_9bit_signed_unscaled_p (mode, offset)
 		    || offset_12bit_unsigned_scaled_p (mode, offset));
@@ -3148,12 +3484,12 @@
 	     We conservatively require an offset representable in either mode.
 	   */
 	  if (mode == TImode || mode == TFmode)
-	    return (offset_7bit_signed_scaled_p (mode, offset)
+	    return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
 		    && offset_9bit_signed_unscaled_p (mode, offset));
 
 	  if (outer_code == PARALLEL)
 	    return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
-		    && offset_7bit_signed_scaled_p (mode, offset));
+		    && aarch64_offset_7bit_signed_scaled_p (mode, offset));
 	  else
 	    return offset_9bit_signed_unscaled_p (mode, offset);
 	}
@@ -3337,7 +3673,7 @@
      the comparison will have to be swapped when we emit the assembly
      code.  */
   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
-      && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
+      && (REG_P (y) || GET_CODE (y) == SUBREG)
       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
 	  || GET_CODE (x) == LSHIFTRT
 	  || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
@@ -3346,7 +3682,7 @@
   /* Similarly for a negated operand, but we can only do this for
      equalities.  */
   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
-      && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
+      && (REG_P (y) || GET_CODE (y) == SUBREG)
       && (code == EQ || code == NE)
       && GET_CODE (x) == NEG)
     return CC_Zmode;
@@ -3363,7 +3699,7 @@
   return CCmode;
 }
 
-static unsigned
+int
 aarch64_get_condition_code (rtx x)
 {
   enum machine_mode mode = GET_MODE (XEXP (x, 0));
@@ -3390,7 +3726,7 @@
 	case UNLE: return AARCH64_LE;
 	case UNGT: return AARCH64_HI;
 	case UNGE: return AARCH64_PL;
-	default: gcc_unreachable ();
+	default: return -1;
 	}
       break;
 
@@ -3407,7 +3743,7 @@
 	case GTU: return AARCH64_HI;
 	case LEU: return AARCH64_LS;
 	case LTU: return AARCH64_CC;
-	default: gcc_unreachable ();
+	default: return -1;
 	}
       break;
 
@@ -3426,7 +3762,7 @@
 	case GTU: return AARCH64_CC;
 	case LEU: return AARCH64_CS;
 	case LTU: return AARCH64_HI;
-	default: gcc_unreachable ();
+	default: return -1;
 	}
       break;
 
@@ -3437,7 +3773,7 @@
 	case EQ: return AARCH64_EQ;
 	case GE: return AARCH64_PL;
 	case LT: return AARCH64_MI;
-	default: gcc_unreachable ();
+	default: return -1;
 	}
       break;
 
@@ -3446,16 +3782,46 @@
 	{
 	case NE: return AARCH64_NE;
 	case EQ: return AARCH64_EQ;
-	default: gcc_unreachable ();
+	default: return -1;
 	}
       break;
 
     default:
-      gcc_unreachable ();
+      return -1;
       break;
     }
 }
 
+bool
+aarch64_const_vec_all_same_in_range_p (rtx x,
+				  HOST_WIDE_INT minval,
+				  HOST_WIDE_INT maxval)
+{
+  HOST_WIDE_INT firstval;
+  int count, i;
+
+  if (GET_CODE (x) != CONST_VECTOR
+      || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
+    return false;
+
+  firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
+  if (firstval < minval || firstval > maxval)
+    return false;
+
+  count = CONST_VECTOR_NUNITS (x);
+  for (i = 1; i < count; i++)
+    if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
+      return false;
+
+  return true;
+}
+
+bool
+aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val)
+{
+  return aarch64_const_vec_all_same_in_range_p (x, val, val);
+}
+
 static unsigned
 bit_count (unsigned HOST_WIDE_INT value)
 {
@@ -3506,7 +3872,7 @@
       {
 	int n;
 
-	if (GET_CODE (x) != CONST_INT
+	if (!CONST_INT_P (x)
 	    || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
 	  {
 	    output_operand_lossage ("invalid operand for '%%%c'", code);
@@ -3536,7 +3902,7 @@
 	int n;
 
 	/* Print N such that 2^N == X.  */
-	if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
+	if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0)
 	  {
 	    output_operand_lossage ("invalid operand for '%%%c'", code);
 	    return;
@@ -3548,7 +3914,7 @@
 
     case 'P':
       /* Print the number of non-zero bits in X (a const_int).  */
-      if (GET_CODE (x) != CONST_INT)
+      if (!CONST_INT_P (x))
 	{
 	  output_operand_lossage ("invalid operand for '%%%c'", code);
 	  return;
@@ -3559,7 +3925,7 @@
 
     case 'H':
       /* Print the higher numbered register of a pair (TImode) of regs.  */
-      if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
+      if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1))
 	{
 	  output_operand_lossage ("invalid operand for '%%%c'", code);
 	  return;
@@ -3569,39 +3935,48 @@
       break;
 
     case 'm':
-      /* Print a condition (eq, ne, etc).  */
+      {
+        int cond_code;
+        /* Print a condition (eq, ne, etc).  */
 
-      /* CONST_TRUE_RTX means always -- that's the default.  */
-      if (x == const_true_rtx)
-	return;
-
-      if (!COMPARISON_P (x))
-	{
-	  output_operand_lossage ("invalid operand for '%%%c'", code);
+        /* CONST_TRUE_RTX means always -- that's the default.  */
+        if (x == const_true_rtx)
 	  return;
-	}
 
-      fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
+        if (!COMPARISON_P (x))
+	  {
+	    output_operand_lossage ("invalid operand for '%%%c'", code);
+	    return;
+	  }
+
+        cond_code = aarch64_get_condition_code (x);
+        gcc_assert (cond_code >= 0);
+        fputs (aarch64_condition_codes[cond_code], f);
+      }
       break;
 
     case 'M':
-      /* Print the inverse of a condition (eq <-> ne, etc).  */
+      {
+        int cond_code;
+        /* Print the inverse of a condition (eq <-> ne, etc).  */
 
-      /* CONST_TRUE_RTX means never -- that's the default.  */
-      if (x == const_true_rtx)
-	{
-	  fputs ("nv", f);
-	  return;
-	}
+        /* CONST_TRUE_RTX means never -- that's the default.  */
+        if (x == const_true_rtx)
+	  {
+	    fputs ("nv", f);
+	    return;
+	  }
 
-      if (!COMPARISON_P (x))
-	{
-	  output_operand_lossage ("invalid operand for '%%%c'", code);
-	  return;
-	}
-
-      fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
-				  (aarch64_get_condition_code (x))], f);
+        if (!COMPARISON_P (x))
+	  {
+	    output_operand_lossage ("invalid operand for '%%%c'", code);
+	    return;
+	  }
+        cond_code = aarch64_get_condition_code (x);
+        gcc_assert (cond_code >= 0);
+        fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
+                                       (cond_code)], f);
+      }
       break;
 
     case 'b':
@@ -3633,7 +4008,7 @@
 
     case 'X':
       /* Print bottom 16 bits of integer constant in hex.  */
-      if (GET_CODE (x) != CONST_INT)
+      if (!CONST_INT_P (x))
 	{
 	  output_operand_lossage ("invalid operand for '%%%c'", code);
 	  return;
@@ -3698,9 +4073,10 @@
 	case CONST_VECTOR:
 	  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
 	    {
-	      gcc_assert (aarch64_const_vec_all_same_int_p (x,
-							    HOST_WIDE_INT_MIN,
-							    HOST_WIDE_INT_MAX));
+	      gcc_assert (
+		  aarch64_const_vec_all_same_in_range_p (x,
+							 HOST_WIDE_INT_MIN,
+							 HOST_WIDE_INT_MAX));
 	      asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
 	    }
 	  else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
@@ -3843,34 +4219,34 @@
 	if (addr.offset == const0_rtx)
 	  asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
 	else
-	  asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)],
+	  asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
 		       INTVAL (addr.offset));
 	return;
 
       case ADDRESS_REG_REG:
 	if (addr.shift == 0)
-	  asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)],
+	  asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
 		       reg_names [REGNO (addr.offset)]);
 	else
-	  asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)],
+	  asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
 		       reg_names [REGNO (addr.offset)], addr.shift);
 	return;
 
       case ADDRESS_REG_UXTW:
 	if (addr.shift == 0)
-	  asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)],
+	  asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
 		       REGNO (addr.offset) - R0_REGNUM);
 	else
-	  asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)],
+	  asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
 		       REGNO (addr.offset) - R0_REGNUM, addr.shift);
 	return;
 
       case ADDRESS_REG_SXTW:
 	if (addr.shift == 0)
-	  asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)],
+	  asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
 		       REGNO (addr.offset) - R0_REGNUM);
 	else
-	  asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)],
+	  asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
 		       REGNO (addr.offset) - R0_REGNUM, addr.shift);
 	return;
 
@@ -3878,27 +4254,27 @@
 	switch (GET_CODE (x))
 	  {
 	  case PRE_INC:
-	    asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)], 
+	    asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)],
 			 GET_MODE_SIZE (aarch64_memory_reference_mode));
 	    return;
 	  case POST_INC:
-	    asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)],
+	    asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)],
 			 GET_MODE_SIZE (aarch64_memory_reference_mode));
 	    return;
 	  case PRE_DEC:
-	    asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)],
+	    asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)],
 			 GET_MODE_SIZE (aarch64_memory_reference_mode));
 	    return;
 	  case POST_DEC:
-	    asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)],
+	    asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)],
 			 GET_MODE_SIZE (aarch64_memory_reference_mode));
 	    return;
 	  case PRE_MODIFY:
-	    asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)],
+	    asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)],
 			 INTVAL (addr.offset));
 	    return;
 	  case POST_MODIFY:
-	    asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)],
+	    asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)],
 			 INTVAL (addr.offset));
 	    return;
 	  default:
@@ -3907,7 +4283,7 @@
 	break;
 
       case ADDRESS_LO_SUM:
-	asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]);
+	asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
 	output_addr_const (f, addr.offset);
 	asm_fprintf (f, "]");
 	return;
@@ -3958,7 +4334,7 @@
 aarch64_regno_regclass (unsigned regno)
 {
   if (GP_REGNUM_P (regno))
-    return CORE_REGS;
+    return GENERAL_REGS;
 
   if (regno == SP_REGNUM)
     return STACK_REG;
@@ -3973,6 +4349,47 @@
   return NO_REGS;
 }
 
+static rtx
+aarch64_legitimize_address (rtx x, rtx /* orig_x  */, enum machine_mode mode)
+{
+  /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
+     where mask is selected by alignment and size of the offset.
+     We try to pick as large a range for the offset as possible to
+     maximize the chance of a CSE.  However, for aligned addresses
+     we limit the range to 4k so that structures with different sized
+     elements are likely to use the same base.  */
+
+  if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
+    {
+      HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
+      HOST_WIDE_INT base_offset;
+
+      /* Does it look like we'll need a load/store-pair operation?  */
+      if (GET_MODE_SIZE (mode) > 16
+	  || mode == TImode)
+	base_offset = ((offset + 64 * GET_MODE_SIZE (mode))
+		       & ~((128 * GET_MODE_SIZE (mode)) - 1));
+      /* For offsets aren't a multiple of the access size, the limit is
+	 -256...255.  */
+      else if (offset & (GET_MODE_SIZE (mode) - 1))
+	base_offset = (offset + 0x100) & ~0x1ff;
+      else
+	base_offset = offset & ~0xfff;
+
+      if (base_offset == 0)
+	return x;
+
+      offset -= base_offset;
+      rtx base_reg = gen_reg_rtx (Pmode);
+      rtx val = force_operand (plus_constant (Pmode, XEXP (x, 0), base_offset),
+			   NULL_RTX);
+      emit_move_insn (base_reg, val);
+      x = plus_constant (Pmode, base_reg, offset);
+    }
+
+  return x;
+}
+
 /* Try a machine-dependent way of reloading an illegitimate address
    operand.  If we find one, push the reload and return the new rtx.  */
 
@@ -3984,8 +4401,8 @@
 {
   rtx x = *x_p;
 
-  /* Do not allow mem (plus (reg, const)) if vector mode.  */
-  if (aarch64_vector_mode_p (mode)
+  /* Do not allow mem (plus (reg, const)) if vector struct mode.  */
+  if (aarch64_vect_struct_mode_p (mode)
       && GET_CODE (x) == PLUS
       && REG_P (XEXP (x, 0))
       && CONST_INT_P (XEXP (x, 1)))
@@ -4109,12 +4526,12 @@
   /* A TFmode or TImode memory access should be handled via an FP_REGS
      because AArch64 has richer addressing modes for LDR/STR instructions
      than LDP/STP instructions.  */
-  if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
+  if (!TARGET_GENERAL_REGS_ONLY && rclass == GENERAL_REGS
       && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
     return FP_REGS;
 
   if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
-      return CORE_REGS;
+      return GENERAL_REGS;
 
   return NO_REGS;
 }
@@ -4139,6 +4556,16 @@
 
       return false;
     }
+  else
+    {
+      /* If we decided that we didn't need a leaf frame pointer but then used
+	 LR in the function, then we'll want a frame pointer after all, so
+	 prevent this elimination to ensure a frame pointer is used.  */
+      if (to == STACK_POINTER_REGNUM
+	  && flag_omit_leaf_frame_pointer
+	  && df_regs_ever_live_p (LR_REGNUM))
+	return false;
+    }
 
   return true;
 }
@@ -4146,43 +4573,28 @@
 HOST_WIDE_INT
 aarch64_initial_elimination_offset (unsigned from, unsigned to)
 {
-  HOST_WIDE_INT frame_size;
-  HOST_WIDE_INT offset;
-
   aarch64_layout_frame ();
-  frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
-		+ crtl->outgoing_args_size
-		+ cfun->machine->saved_varargs_size);
 
-   frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
-   offset = frame_size;
+  if (to == HARD_FRAME_POINTER_REGNUM)
+    {
+      if (from == ARG_POINTER_REGNUM)
+	return cfun->machine->frame.frame_size - crtl->outgoing_args_size;
 
-   if (to == HARD_FRAME_POINTER_REGNUM)
-     {
-       if (from == ARG_POINTER_REGNUM)
-	 return offset - crtl->outgoing_args_size;
+      if (from == FRAME_POINTER_REGNUM)
+	return (cfun->machine->frame.hard_fp_offset
+		- cfun->machine->frame.saved_varargs_size);
+    }
 
-       if (from == FRAME_POINTER_REGNUM)
-	 return cfun->machine->frame.saved_regs_size + get_frame_size ();
-     }
+  if (to == STACK_POINTER_REGNUM)
+    {
+      if (from == FRAME_POINTER_REGNUM)
+	  return (cfun->machine->frame.frame_size
+		  - cfun->machine->frame.saved_varargs_size);
+    }
 
-   if (to == STACK_POINTER_REGNUM)
-     {
-       if (from == FRAME_POINTER_REGNUM)
-         {
-           HOST_WIDE_INT elim = crtl->outgoing_args_size
-                              + cfun->machine->frame.saved_regs_size
-                              + get_frame_size ()
-                              - cfun->machine->frame.fp_lr_offset;
-           elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
-           return elim;
-         }
-     }
-
-   return offset;
+  return cfun->machine->frame.frame_size;
 }
 
-
 /* Implement RETURN_ADDR_RTX.  We do not support moving back to a
    previous frame.  */
 
@@ -4246,7 +4658,7 @@
 {
   switch (regclass)
     {
-    case CORE_REGS:
+    case CALLER_SAVE_REGS:
     case POINTER_REGS:
     case GENERAL_REGS:
     case ALL_REGS:
@@ -4447,9 +4859,13 @@
 {
   rtx op = x;
 
+  /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
+     we can convert both to ROR during final output.  */
   if ((GET_CODE (op) == ASHIFT
        || GET_CODE (op) == ASHIFTRT
-       || GET_CODE (op) == LSHIFTRT)
+       || GET_CODE (op) == LSHIFTRT
+       || GET_CODE (op) == ROTATERT
+       || GET_CODE (op) == ROTATE)
       && CONST_INT_P (XEXP (op, 1)))
     return XEXP (op, 0);
 
@@ -4461,12 +4877,12 @@
   return x;
 }
 
-/* Helper function for rtx cost calculation.  Strip a shift or extend
+/* Helper function for rtx cost calculation.  Strip an extend
    expression from X.  Returns the inner operand if successful, or the
    original expression on failure.  We deal with a number of possible
    canonicalization variations here.  */
 static rtx
-aarch64_strip_shift_or_extend (rtx x)
+aarch64_strip_extend (rtx x)
 {
   rtx op = x;
 
@@ -4473,6 +4889,7 @@
   /* Zero and sign extraction of a widened value.  */
   if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
       && XEXP (op, 2) == const0_rtx
+      && GET_CODE (XEXP (op, 0)) == MULT
       && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
 					 XEXP (op, 1)))
     return XEXP (XEXP (op, 0), 0);
@@ -4501,9 +4918,335 @@
   if (op != x)
     return op;
 
-  return aarch64_strip_shift (x);
+  return x;
 }
 
+/* Helper function for rtx cost calculation.  Calculate the cost of
+   a MULT, which may be part of a multiply-accumulate rtx.  Return
+   the calculated cost of the expression, recursing manually in to
+   operands where needed.  */
+
+static int
+aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
+{
+  rtx op0, op1;
+  const struct cpu_cost_table *extra_cost
+    = aarch64_tune_params->insn_extra_cost;
+  int cost = 0;
+  bool maybe_fma = (outer == PLUS || outer == MINUS);
+  enum machine_mode mode = GET_MODE (x);
+
+  gcc_checking_assert (code == MULT);
+
+  op0 = XEXP (x, 0);
+  op1 = XEXP (x, 1);
+
+  if (VECTOR_MODE_P (mode))
+    mode = GET_MODE_INNER (mode);
+
+  /* Integer multiply/fma.  */
+  if (GET_MODE_CLASS (mode) == MODE_INT)
+    {
+      /* The multiply will be canonicalized as a shift, cost it as such.  */
+      if (CONST_INT_P (op1)
+	  && exact_log2 (INTVAL (op1)) > 0)
+	{
+	  if (speed)
+	    {
+	      if (maybe_fma)
+		/* ADD (shifted register).  */
+		cost += extra_cost->alu.arith_shift;
+	      else
+		/* LSL (immediate).  */
+		cost += extra_cost->alu.shift;
+	    }
+
+	  cost += rtx_cost (op0, GET_CODE (op0), 0, speed);
+
+	  return cost;
+	}
+
+      /* Integer multiplies or FMAs have zero/sign extending variants.  */
+      if ((GET_CODE (op0) == ZERO_EXTEND
+	   && GET_CODE (op1) == ZERO_EXTEND)
+	  || (GET_CODE (op0) == SIGN_EXTEND
+	      && GET_CODE (op1) == SIGN_EXTEND))
+	{
+	  cost += rtx_cost (XEXP (op0, 0), MULT, 0, speed)
+		  + rtx_cost (XEXP (op1, 0), MULT, 1, speed);
+
+	  if (speed)
+	    {
+	      if (maybe_fma)
+		/* MADD/SMADDL/UMADDL.  */
+		cost += extra_cost->mult[0].extend_add;
+	      else
+		/* MUL/SMULL/UMULL.  */
+		cost += extra_cost->mult[0].extend;
+	    }
+
+	  return cost;
+	}
+
+      /* This is either an integer multiply or an FMA.  In both cases
+	 we want to recurse and cost the operands.  */
+      cost += rtx_cost (op0, MULT, 0, speed)
+	      + rtx_cost (op1, MULT, 1, speed);
+
+      if (speed)
+	{
+	  if (maybe_fma)
+	    /* MADD.  */
+	    cost += extra_cost->mult[mode == DImode].add;
+	  else
+	    /* MUL.  */
+	    cost += extra_cost->mult[mode == DImode].simple;
+	}
+
+      return cost;
+    }
+  else
+    {
+      if (speed)
+	{
+	  /* Floating-point FMA/FMUL can also support negations of the
+	     operands.  */
+	  if (GET_CODE (op0) == NEG)
+	    op0 = XEXP (op0, 0);
+	  if (GET_CODE (op1) == NEG)
+	    op1 = XEXP (op1, 0);
+
+	  if (maybe_fma)
+	    /* FMADD/FNMADD/FNMSUB/FMSUB.  */
+	    cost += extra_cost->fp[mode == DFmode].fma;
+	  else
+	    /* FMUL/FNMUL.  */
+	    cost += extra_cost->fp[mode == DFmode].mult;
+	}
+
+      cost += rtx_cost (op0, MULT, 0, speed)
+	      + rtx_cost (op1, MULT, 1, speed);
+      return cost;
+    }
+}
+
+static int
+aarch64_address_cost (rtx x,
+		      enum machine_mode mode,
+		      addr_space_t as ATTRIBUTE_UNUSED,
+		      bool speed)
+{
+  enum rtx_code c = GET_CODE (x);
+  const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
+  struct aarch64_address_info info;
+  int cost = 0;
+  info.shift = 0;
+
+  if (!aarch64_classify_address (&info, x, mode, c, false))
+    {
+      if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
+	{
+	  /* This is a CONST or SYMBOL ref which will be split
+	     in a different way depending on the code model in use.
+	     Cost it through the generic infrastructure.  */
+	  int cost_symbol_ref = rtx_cost (x, MEM, 1, speed);
+	  /* Divide through by the cost of one instruction to
+	     bring it to the same units as the address costs.  */
+	  cost_symbol_ref /= COSTS_N_INSNS (1);
+	  /* The cost is then the cost of preparing the address,
+	     followed by an immediate (possibly 0) offset.  */
+	  return cost_symbol_ref + addr_cost->imm_offset;
+	}
+      else
+	{
+	  /* This is most likely a jump table from a case
+	     statement.  */
+	  return addr_cost->register_offset;
+	}
+    }
+
+  switch (info.type)
+    {
+      case ADDRESS_LO_SUM:
+      case ADDRESS_SYMBOLIC:
+      case ADDRESS_REG_IMM:
+	cost += addr_cost->imm_offset;
+	break;
+
+      case ADDRESS_REG_WB:
+	if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
+	  cost += addr_cost->pre_modify;
+	else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
+	  cost += addr_cost->post_modify;
+	else
+	  gcc_unreachable ();
+
+	break;
+
+      case ADDRESS_REG_REG:
+	cost += addr_cost->register_offset;
+	break;
+
+      case ADDRESS_REG_UXTW:
+      case ADDRESS_REG_SXTW:
+	cost += addr_cost->register_extend;
+	break;
+
+      default:
+	gcc_unreachable ();
+    }
+
+
+  if (info.shift > 0)
+    {
+      /* For the sake of calculating the cost of the shifted register
+	 component, we can treat same sized modes in the same way.  */
+      switch (GET_MODE_BITSIZE (mode))
+	{
+	  case 16:
+	    cost += addr_cost->addr_scale_costs.hi;
+	    break;
+
+	  case 32:
+	    cost += addr_cost->addr_scale_costs.si;
+	    break;
+
+	  case 64:
+	    cost += addr_cost->addr_scale_costs.di;
+	    break;
+
+	  /* We can't tell, or this is a 128-bit vector.  */
+	  default:
+	    cost += addr_cost->addr_scale_costs.ti;
+	    break;
+	}
+    }
+
+  return cost;
+}
+
+/* Return true if the RTX X in mode MODE is a zero or sign extract
+   usable in an ADD or SUB (extended register) instruction.  */
+static bool
+aarch64_rtx_arith_op_extract_p (rtx x, enum machine_mode mode)
+{
+  /* Catch add with a sign extract.
+     This is add_<optab><mode>_multp2.  */
+  if (GET_CODE (x) == SIGN_EXTRACT
+      || GET_CODE (x) == ZERO_EXTRACT)
+    {
+      rtx op0 = XEXP (x, 0);
+      rtx op1 = XEXP (x, 1);
+      rtx op2 = XEXP (x, 2);
+
+      if (GET_CODE (op0) == MULT
+	  && CONST_INT_P (op1)
+	  && op2 == const0_rtx
+	  && CONST_INT_P (XEXP (op0, 1))
+	  && aarch64_is_extend_from_extract (mode,
+					     XEXP (op0, 1),
+					     op1))
+	{
+	  return true;
+	}
+    }
+
+  return false;
+}
+
+static bool
+aarch64_frint_unspec_p (unsigned int u)
+{
+  switch (u)
+    {
+      case UNSPEC_FRINTZ:
+      case UNSPEC_FRINTP:
+      case UNSPEC_FRINTM:
+      case UNSPEC_FRINTA:
+      case UNSPEC_FRINTN:
+      case UNSPEC_FRINTX:
+      case UNSPEC_FRINTI:
+        return true;
+
+      default:
+        return false;
+    }
+}
+
+/* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
+   storing it in *COST.  Result is true if the total cost of the operation
+   has now been calculated.  */
+static bool
+aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
+{
+  rtx inner;
+  rtx comparator;
+  enum rtx_code cmpcode;
+
+  if (COMPARISON_P (op0))
+    {
+      inner = XEXP (op0, 0);
+      comparator = XEXP (op0, 1);
+      cmpcode = GET_CODE (op0);
+    }
+  else
+    {
+      inner = op0;
+      comparator = const0_rtx;
+      cmpcode = NE;
+    }
+
+  if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
+    {
+      /* Conditional branch.  */
+      if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
+	return true;
+      else
+	{
+	  if (cmpcode == NE || cmpcode == EQ)
+	    {
+	      if (comparator == const0_rtx)
+		{
+		  /* TBZ/TBNZ/CBZ/CBNZ.  */
+		  if (GET_CODE (inner) == ZERO_EXTRACT)
+		    /* TBZ/TBNZ.  */
+		    *cost += rtx_cost (XEXP (inner, 0), ZERO_EXTRACT,
+			 	       0, speed);
+		else
+		  /* CBZ/CBNZ.  */
+		  *cost += rtx_cost (inner, cmpcode, 0, speed);
+
+	        return true;
+	      }
+	    }
+	  else if (cmpcode == LT || cmpcode == GE)
+	    {
+	      /* TBZ/TBNZ.  */
+	      if (comparator == const0_rtx)
+		return true;
+	    }
+	}
+    }
+  else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
+    {
+      /* It's a conditional operation based on the status flags,
+	 so it must be some flavor of CSEL.  */
+
+      /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL.  */
+      if (GET_CODE (op1) == NEG
+          || GET_CODE (op1) == NOT
+          || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
+	op1 = XEXP (op1, 0);
+
+      *cost += rtx_cost (op1, IF_THEN_ELSE, 1, speed);
+      *cost += rtx_cost (op2, IF_THEN_ELSE, 2, speed);
+      return true;
+    }
+
+  /* We don't know what this is, cost all operands.  */
+  return false;
+}
+
 /* Calculate the cost of calculating X, storing it in *COST.  Result
    is true if the total cost of the operation has now been calculated.  */
 static bool
@@ -4510,13 +5253,31 @@
 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
 		   int param ATTRIBUTE_UNUSED, int *cost, bool speed)
 {
-  rtx op0, op1;
+  rtx op0, op1, op2;
   const struct cpu_cost_table *extra_cost
     = aarch64_tune_params->insn_extra_cost;
+  enum machine_mode mode = GET_MODE (x);
 
+  /* By default, assume that everything has equivalent cost to the
+     cheapest instruction.  Any additional costs are applied as a delta
+     above this default.  */
+  *cost = COSTS_N_INSNS (1);
+
+  /* TODO: The cost infrastructure currently does not handle
+     vector operations.  Assume that all vector operations
+     are equally expensive.  */
+  if (VECTOR_MODE_P (mode))
+    {
+      if (speed)
+	*cost += extra_cost->vect.alu;
+      return true;
+    }
+
   switch (code)
     {
     case SET:
+      /* The cost depends entirely on the operands to SET.  */
+      *cost = 0;
       op0 = SET_DEST (x);
       op1 = SET_SRC (x);
 
@@ -4524,52 +5285,194 @@
 	{
 	case MEM:
 	  if (speed)
-	    *cost += extra_cost->ldst.store;
+	    {
+	      rtx address = XEXP (op0, 0);
+	      if (GET_MODE_CLASS (mode) == MODE_INT)
+		*cost += extra_cost->ldst.store;
+	      else if (mode == SFmode)
+		*cost += extra_cost->ldst.storef;
+	      else if (mode == DFmode)
+		*cost += extra_cost->ldst.stored;
 
-	  if (op1 != const0_rtx)
-	    *cost += rtx_cost (op1, SET, 1, speed);
+	      *cost +=
+		COSTS_N_INSNS (aarch64_address_cost (address, mode,
+						     0, speed));
+	    }
+
+	  *cost += rtx_cost (op1, SET, 1, speed);
 	  return true;
 
 	case SUBREG:
 	  if (! REG_P (SUBREG_REG (op0)))
 	    *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
+
 	  /* Fall through.  */
 	case REG:
-	  /* Cost is just the cost of the RHS of the set.  */
-	  *cost += rtx_cost (op1, SET, 1, true);
+	  /* const0_rtx is in general free, but we will use an
+	     instruction to set a register to 0.  */
+          if (REG_P (op1) || op1 == const0_rtx)
+            {
+              /* The cost is 1 per register copied.  */
+              int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
+			      / UNITS_PER_WORD;
+              *cost = COSTS_N_INSNS (n_minus_1 + 1);
+            }
+          else
+	    /* Cost is just the cost of the RHS of the set.  */
+	    *cost += rtx_cost (op1, SET, 1, speed);
 	  return true;
 
-	case ZERO_EXTRACT:  /* Bit-field insertion.  */
+	case ZERO_EXTRACT:
 	case SIGN_EXTRACT:
-	  /* Strip any redundant widening of the RHS to meet the width of
-	     the target.  */
+	  /* Bit-field insertion.  Strip any redundant widening of
+	     the RHS to meet the width of the target.  */
 	  if (GET_CODE (op1) == SUBREG)
 	    op1 = SUBREG_REG (op1);
 	  if ((GET_CODE (op1) == ZERO_EXTEND
 	       || GET_CODE (op1) == SIGN_EXTEND)
-	      && GET_CODE (XEXP (op0, 1)) == CONST_INT
+	      && CONST_INT_P (XEXP (op0, 1))
 	      && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
 		  >= INTVAL (XEXP (op0, 1))))
 	    op1 = XEXP (op1, 0);
-	  *cost += rtx_cost (op1, SET, 1, speed);
+
+          if (CONST_INT_P (op1))
+            {
+              /* MOV immediate is assumed to always be cheap.  */
+              *cost = COSTS_N_INSNS (1);
+            }
+          else
+            {
+              /* BFM.  */
+	      if (speed)
+		*cost += extra_cost->alu.bfi;
+              *cost += rtx_cost (op1, (enum rtx_code) code, 1, speed);
+            }
+
 	  return true;
 
 	default:
-	  break;
+	  /* We can't make sense of this, assume default cost.  */
+          *cost = COSTS_N_INSNS (1);
+	  return false;
 	}
       return false;
 
+    case CONST_INT:
+      /* If an instruction can incorporate a constant within the
+	 instruction, the instruction's expression avoids calling
+	 rtx_cost() on the constant.  If rtx_cost() is called on a
+	 constant, then it is usually because the constant must be
+	 moved into a register by one or more instructions.
+
+	 The exception is constant 0, which can be expressed
+	 as XZR/WZR and is therefore free.  The exception to this is
+	 if we have (set (reg) (const0_rtx)) in which case we must cost
+	 the move.  However, we can catch that when we cost the SET, so
+	 we don't need to consider that here.  */
+      if (x == const0_rtx)
+	*cost = 0;
+      else
+	{
+	  /* To an approximation, building any other constant is
+	     proportionally expensive to the number of instructions
+	     required to build that constant.  This is true whether we
+	     are compiling for SPEED or otherwise.  */
+	  *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
+				 (NULL_RTX, x, false, mode));
+	}
+      return true;
+
+    case CONST_DOUBLE:
+      if (speed)
+	{
+	  /* mov[df,sf]_aarch64.  */
+	  if (aarch64_float_const_representable_p (x))
+	    /* FMOV (scalar immediate).  */
+	    *cost += extra_cost->fp[mode == DFmode].fpconst;
+	  else if (!aarch64_float_const_zero_rtx_p (x))
+	    {
+	      /* This will be a load from memory.  */
+	      if (mode == DFmode)
+		*cost += extra_cost->ldst.loadd;
+	      else
+		*cost += extra_cost->ldst.loadf;
+	    }
+	  else
+	    /* Otherwise this is +0.0.  We get this using MOVI d0, #0
+	       or MOV v0.s[0], wzr - neither of which are modeled by the
+	       cost tables.  Just use the default cost.  */
+	    {
+	    }
+	}
+
+      return true;
+
     case MEM:
       if (speed)
-	*cost += extra_cost->ldst.load;
+	{
+	  /* For loads we want the base cost of a load, plus an
+	     approximation for the additional cost of the addressing
+	     mode.  */
+	  rtx address = XEXP (x, 0);
+	  if (GET_MODE_CLASS (mode) == MODE_INT)
+	    *cost += extra_cost->ldst.load;
+	  else if (mode == SFmode)
+	    *cost += extra_cost->ldst.loadf;
+	  else if (mode == DFmode)
+	    *cost += extra_cost->ldst.loadd;
 
+	  *cost +=
+		COSTS_N_INSNS (aarch64_address_cost (address, mode,
+						     0, speed));
+	}
+
       return true;
 
     case NEG:
-      op0 = CONST0_RTX (GET_MODE (x));
-      op1 = XEXP (x, 0);
-      goto cost_minus;
+      op0 = XEXP (x, 0);
 
+      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
+       {
+          if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
+              || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
+            {
+              /* CSETM.  */
+              *cost += rtx_cost (XEXP (op0, 0), NEG, 0, speed);
+              return true;
+            }
+
+	  /* Cost this as SUB wzr, X.  */
+          op0 = CONST0_RTX (GET_MODE (x));
+          op1 = XEXP (x, 0);
+          goto cost_minus;
+        }
+
+      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+        {
+          /* Support (neg(fma...)) as a single instruction only if
+             sign of zeros is unimportant.  This matches the decision
+             making in aarch64.md.  */
+          if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
+            {
+	      /* FNMADD.  */
+              *cost = rtx_cost (op0, NEG, 0, speed);
+              return true;
+            }
+	  if (speed)
+	    /* FNEG.  */
+	    *cost += extra_cost->fp[mode == DFmode].neg;
+          return false;
+        }
+
+      return false;
+
+    case CLRSB:
+    case CLZ:
+      if (speed)
+        *cost += extra_cost->alu.clz;
+
+      return false;
+
     case COMPARE:
       op0 = XEXP (x, 0);
       op1 = XEXP (x, 1);
@@ -4581,96 +5484,228 @@
 	  goto cost_logic;
 	}
 
-      /* Comparisons can work if the order is swapped.
-	 Canonicalization puts the more complex operation first, but
-	 we want it in op1.  */
-      if (! (REG_P (op0)
-	     || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
-	{
-	  op0 = XEXP (x, 1);
-	  op1 = XEXP (x, 0);
-	}
-      goto cost_minus;
+      if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
+        {
+          /* TODO: A write to the CC flags possibly costs extra, this
+	     needs encoding in the cost tables.  */
 
+          /* CC_ZESWPmode supports zero extend for free.  */
+          if (GET_MODE (x) == CC_ZESWPmode && GET_CODE (op0) == ZERO_EXTEND)
+            op0 = XEXP (op0, 0);
+
+          /* ANDS.  */
+          if (GET_CODE (op0) == AND)
+            {
+              x = op0;
+              goto cost_logic;
+            }
+
+          if (GET_CODE (op0) == PLUS)
+            {
+	      /* ADDS (and CMN alias).  */
+              x = op0;
+              goto cost_plus;
+            }
+
+          if (GET_CODE (op0) == MINUS)
+            {
+	      /* SUBS.  */
+              x = op0;
+              goto cost_minus;
+            }
+
+          if (GET_CODE (op1) == NEG)
+            {
+	      /* CMN.  */
+	      if (speed)
+		*cost += extra_cost->alu.arith;
+
+              *cost += rtx_cost (op0, COMPARE, 0, speed);
+	      *cost += rtx_cost (XEXP (op1, 0), NEG, 1, speed);
+              return true;
+            }
+
+          /* CMP.
+
+	     Compare can freely swap the order of operands, and
+             canonicalization puts the more complex operation first.
+             But the integer MINUS logic expects the shift/extend
+             operation in op1.  */
+          if (! (REG_P (op0)
+                 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
+          {
+            op0 = XEXP (x, 1);
+            op1 = XEXP (x, 0);
+          }
+          goto cost_minus;
+        }
+
+      if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
+        {
+	  /* FCMP.  */
+	  if (speed)
+	    *cost += extra_cost->fp[mode == DFmode].compare;
+
+          if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
+            {
+              /* FCMP supports constant 0.0 for no extra cost. */
+              return true;
+            }
+          return false;
+        }
+
+      return false;
+
     case MINUS:
-      op0 = XEXP (x, 0);
-      op1 = XEXP (x, 1);
+      {
+	op0 = XEXP (x, 0);
+	op1 = XEXP (x, 1);
 
-    cost_minus:
-      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
-	  || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC
-	      && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
-	{
-	  if (op0 != const0_rtx)
+cost_minus:
+	/* Detect valid immediates.  */
+	if ((GET_MODE_CLASS (mode) == MODE_INT
+	     || (GET_MODE_CLASS (mode) == MODE_CC
+		 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
+	    && CONST_INT_P (op1)
+	    && aarch64_uimm12_shift (INTVAL (op1)))
+	  {
 	    *cost += rtx_cost (op0, MINUS, 0, speed);
 
-	  if (CONST_INT_P (op1))
-	    {
-	      if (!aarch64_uimm12_shift (INTVAL (op1)))
-		*cost += rtx_cost (op1, MINUS, 1, speed);
-	    }
-	  else
-	    {
-	      op1 = aarch64_strip_shift_or_extend (op1);
-	      *cost += rtx_cost (op1, MINUS, 1, speed);
-	    }
-	  return true;
-	}
+	    if (speed)
+	      /* SUB(S) (immediate).  */
+	      *cost += extra_cost->alu.arith;
+	    return true;
 
-      return false;
+	  }
 
+	/* Look for SUB (extended register).  */
+        if (aarch64_rtx_arith_op_extract_p (op1, mode))
+	  {
+	    if (speed)
+	      *cost += extra_cost->alu.arith_shift;
+
+	    *cost += rtx_cost (XEXP (XEXP (op1, 0), 0),
+			       (enum rtx_code) GET_CODE (op1),
+			       0, speed);
+	    return true;
+	  }
+
+	rtx new_op1 = aarch64_strip_extend (op1);
+
+	/* Cost this as an FMA-alike operation.  */
+	if ((GET_CODE (new_op1) == MULT
+	     || GET_CODE (new_op1) == ASHIFT)
+	    && code != COMPARE)
+	  {
+	    *cost += aarch64_rtx_mult_cost (new_op1, MULT,
+					    (enum rtx_code) code,
+					    speed);
+	    *cost += rtx_cost (op0, MINUS, 0, speed);
+	    return true;
+	  }
+
+	*cost += rtx_cost (new_op1, MINUS, 1, speed);
+
+	if (speed)
+	  {
+	    if (GET_MODE_CLASS (mode) == MODE_INT)
+	      /* SUB(S).  */
+	      *cost += extra_cost->alu.arith;
+	    else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+	      /* FSUB.  */
+	      *cost += extra_cost->fp[mode == DFmode].addsub;
+	  }
+	return true;
+      }
+
     case PLUS:
-      op0 = XEXP (x, 0);
-      op1 = XEXP (x, 1);
+      {
+	rtx new_op0;
 
-      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
-	{
-	  if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
-	    {
-	      *cost += rtx_cost (op0, PLUS, 0, speed);
-	    }
-	  else
-	    {
-	      rtx new_op0 = aarch64_strip_shift_or_extend (op0);
+	op0 = XEXP (x, 0);
+	op1 = XEXP (x, 1);
 
-	      if (new_op0 == op0
-		  && GET_CODE (op0) == MULT)
-		{
-		  if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
-		       && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND)
-		      || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND
-			  && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND))
-		    {
-		      *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0,
-					  speed)
-				+ rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1,
-					    speed)
-				+ rtx_cost (op1, PLUS, 1, speed));
-		      if (speed)
-			*cost +=
-			  extra_cost->mult[GET_MODE (x) == DImode].extend_add;
-		      return true;
-		    }
+cost_plus:
+	if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
+	    || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
+	  {
+	    /* CSINC.  */
+	    *cost += rtx_cost (XEXP (op0, 0), PLUS, 0, speed);
+	    *cost += rtx_cost (op1, PLUS, 1, speed);
+	    return true;
+	  }
 
-		  *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
-			    + rtx_cost (XEXP (op0, 1), MULT, 1, speed)
-			    + rtx_cost (op1, PLUS, 1, speed));
+	if (GET_MODE_CLASS (mode) == MODE_INT
+	    && CONST_INT_P (op1)
+	    && aarch64_uimm12_shift (INTVAL (op1)))
+	  {
+	    *cost += rtx_cost (op0, PLUS, 0, speed);
 
-		  if (speed)
-		    *cost += extra_cost->mult[GET_MODE (x) == DImode].add;
+	    if (speed)
+	      /* ADD (immediate).  */
+	      *cost += extra_cost->alu.arith;
+	    return true;
+	  }
 
-		  return true;
-		}
+	/* Look for ADD (extended register).  */
+        if (aarch64_rtx_arith_op_extract_p (op0, mode))
+	  {
+	    if (speed)
+	      *cost += extra_cost->alu.arith_shift;
 
-	      *cost += (rtx_cost (new_op0, PLUS, 0, speed)
-			+ rtx_cost (op1, PLUS, 1, speed));
-	    }
-	  return true;
-	}
+	    *cost += rtx_cost (XEXP (XEXP (op0, 0), 0),
+			       (enum rtx_code) GET_CODE (op0),
+			       0, speed);
+	    return true;
+	  }
 
+	/* Strip any extend, leave shifts behind as we will
+	   cost them through mult_cost.  */
+	new_op0 = aarch64_strip_extend (op0);
+
+	if (GET_CODE (new_op0) == MULT
+	    || GET_CODE (new_op0) == ASHIFT)
+	  {
+	    *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
+					    speed);
+	    *cost += rtx_cost (op1, PLUS, 1, speed);
+	    return true;
+	  }
+
+	*cost += (rtx_cost (new_op0, PLUS, 0, speed)
+		  + rtx_cost (op1, PLUS, 1, speed));
+
+	if (speed)
+	  {
+	    if (GET_MODE_CLASS (mode) == MODE_INT)
+	      /* ADD.  */
+	      *cost += extra_cost->alu.arith;
+	    else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+	      /* FADD.  */
+	      *cost += extra_cost->fp[mode == DFmode].addsub;
+	  }
+	return true;
+      }
+
+    case BSWAP:
+      *cost = COSTS_N_INSNS (1);
+
+      if (speed)
+        *cost += extra_cost->alu.rev;
+
       return false;
 
     case IOR:
+      if (aarch_rev16_p (x))
+        {
+          *cost = COSTS_N_INSNS (1);
+
+          if (speed)
+            *cost += extra_cost->alu.rev;
+
+          return true;
+        }
+    /* Fall through.  */
     case XOR:
     case AND:
     cost_logic:
@@ -4677,117 +5712,252 @@
       op0 = XEXP (x, 0);
       op1 = XEXP (x, 1);
 
+      if (code == AND
+          && GET_CODE (op0) == MULT
+          && CONST_INT_P (XEXP (op0, 1))
+          && CONST_INT_P (op1)
+          && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
+                               INTVAL (op1)) != 0)
+        {
+          /* This is a UBFM/SBFM.  */
+          *cost += rtx_cost (XEXP (op0, 0), ZERO_EXTRACT, 0, speed);
+	  if (speed)
+	    *cost += extra_cost->alu.bfx;
+          return true;
+        }
+
       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
 	{
+	  /* We possibly get the immediate for free, this is not
+	     modelled.  */
 	  if (CONST_INT_P (op1)
 	      && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
 	    {
-	      *cost += rtx_cost (op0, AND, 0, speed);
+	      *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
+
+	      if (speed)
+		*cost += extra_cost->alu.logical;
+
+	      return true;
 	    }
 	  else
 	    {
+	      rtx new_op0 = op0;
+
+	      /* Handle ORN, EON, or BIC.  */
 	      if (GET_CODE (op0) == NOT)
 		op0 = XEXP (op0, 0);
-	      op0 = aarch64_strip_shift (op0);
-	      *cost += (rtx_cost (op0, AND, 0, speed)
-			+ rtx_cost (op1, AND, 1, speed));
+
+	      new_op0 = aarch64_strip_shift (op0);
+
+	      /* If we had a shift on op0 then this is a logical-shift-
+		 by-register/immediate operation.  Otherwise, this is just
+		 a logical operation.  */
+	      if (speed)
+		{
+		  if (new_op0 != op0)
+		    {
+		      /* Shift by immediate.  */
+		      if (CONST_INT_P (XEXP (op0, 1)))
+			*cost += extra_cost->alu.log_shift;
+		      else
+			*cost += extra_cost->alu.log_shift_reg;
+		    }
+		  else
+		    *cost += extra_cost->alu.logical;
+		}
+
+	      /* In both cases we want to cost both operands.  */
+	      *cost += rtx_cost (new_op0, (enum rtx_code) code, 0, speed)
+		       + rtx_cost (op1, (enum rtx_code) code, 1, speed);
+
+	      return true;
 	    }
-	  return true;
 	}
       return false;
 
+    case NOT:
+      /* MVN.  */
+      if (speed)
+	*cost += extra_cost->alu.logical;
+
+      /* The logical instruction could have the shifted register form,
+         but the cost is the same if the shift is processed as a separate
+         instruction, so we don't bother with it here.  */
+      return false;
+
     case ZERO_EXTEND:
-      if ((GET_MODE (x) == DImode
-	   && GET_MODE (XEXP (x, 0)) == SImode)
-	  || GET_CODE (XEXP (x, 0)) == MEM)
+
+      op0 = XEXP (x, 0);
+      /* If a value is written in SI mode, then zero extended to DI
+	 mode, the operation will in general be free as a write to
+	 a 'w' register implicitly zeroes the upper bits of an 'x'
+	 register.  However, if this is
+
+	   (set (reg) (zero_extend (reg)))
+
+	 we must cost the explicit register move.  */
+      if (mode == DImode
+	  && GET_MODE (op0) == SImode
+	  && outer == SET)
 	{
-	  *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
+	  int op_cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
+
+	  if (!op_cost && speed)
+	    /* MOV.  */
+	    *cost += extra_cost->alu.extend;
+	  else
+	    /* Free, the cost is that of the SI mode operation.  */
+	    *cost = op_cost;
+
 	  return true;
 	}
+      else if (MEM_P (XEXP (x, 0)))
+	{
+	  /* All loads can zero extend to any size for free.  */
+	  *cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, param, speed);
+	  return true;
+	}
+
+      /* UXTB/UXTH.  */
+      if (speed)
+	*cost += extra_cost->alu.extend;
+
       return false;
 
     case SIGN_EXTEND:
-      if (GET_CODE (XEXP (x, 0)) == MEM)
+      if (MEM_P (XEXP (x, 0)))
 	{
-	  *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
+	  /* LDRSH.  */
+	  if (speed)
+	    {
+	      rtx address = XEXP (XEXP (x, 0), 0);
+	      *cost += extra_cost->ldst.load_sign_extend;
+
+	      *cost +=
+		COSTS_N_INSNS (aarch64_address_cost (address, mode,
+						     0, speed));
+	    }
 	  return true;
 	}
+
+      if (speed)
+	*cost += extra_cost->alu.extend;
       return false;
 
+    case ASHIFT:
+      op0 = XEXP (x, 0);
+      op1 = XEXP (x, 1);
+
+      if (CONST_INT_P (op1))
+        {
+	  /* LSL (immediate), UBMF, UBFIZ and friends.  These are all
+	     aliases.  */
+	  if (speed)
+	    *cost += extra_cost->alu.shift;
+
+          /* We can incorporate zero/sign extend for free.  */
+          if (GET_CODE (op0) == ZERO_EXTEND
+              || GET_CODE (op0) == SIGN_EXTEND)
+            op0 = XEXP (op0, 0);
+
+          *cost += rtx_cost (op0, ASHIFT, 0, speed);
+          return true;
+        }
+      else
+        {
+	  /* LSLV.  */
+	  if (speed)
+	    *cost += extra_cost->alu.shift_reg;
+
+	  return false;  /* All arguments need to be in registers.  */
+        }
+
     case ROTATE:
-      if (!CONST_INT_P (XEXP (x, 1)))
-	*cost += COSTS_N_INSNS (2);
-      /* Fall through.  */
     case ROTATERT:
     case LSHIFTRT:
-    case ASHIFT:
     case ASHIFTRT:
+      op0 = XEXP (x, 0);
+      op1 = XEXP (x, 1);
 
-      /* Shifting by a register often takes an extra cycle.  */
-      if (speed && !CONST_INT_P (XEXP (x, 1)))
-	*cost += extra_cost->alu.arith_shift_reg;
+      if (CONST_INT_P (op1))
+	{
+	  /* ASR (immediate) and friends.  */
+	  if (speed)
+	    *cost += extra_cost->alu.shift;
 
-      *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
+	  *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
+	  return true;
+	}
+      else
+	{
+
+	  /* ASR (register) and friends.  */
+	  if (speed)
+	    *cost += extra_cost->alu.shift_reg;
+
+	  return false;  /* All arguments need to be in registers.  */
+	}
+
+    case SYMBOL_REF:
+
+      if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
+	{
+	  /* LDR.  */
+	  if (speed)
+	    *cost += extra_cost->ldst.load;
+	}
+      else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
+	       || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
+	{
+	  /* ADRP, followed by ADD.  */
+	  *cost += COSTS_N_INSNS (1);
+	  if (speed)
+	    *cost += 2 * extra_cost->alu.arith;
+	}
+      else if (aarch64_cmodel == AARCH64_CMODEL_TINY
+	       || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
+	{
+	  /* ADR.  */
+	  if (speed)
+	    *cost += extra_cost->alu.arith;
+	}
+
+      if (flag_pic)
+	{
+	  /* One extra load instruction, after accessing the GOT.  */
+	  *cost += COSTS_N_INSNS (1);
+	  if (speed)
+	    *cost += extra_cost->ldst.load;
+	}
       return true;
 
     case HIGH:
-      if (!CONSTANT_P (XEXP (x, 0)))
-	*cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
-      return true;
-
     case LO_SUM:
-      if (!CONSTANT_P (XEXP (x, 1)))
-	*cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
-      *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
+      /* ADRP/ADD (immediate).  */
+      if (speed)
+	*cost += extra_cost->alu.arith;
       return true;
 
     case ZERO_EXTRACT:
     case SIGN_EXTRACT:
-      *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
+      /* UBFX/SBFX.  */
+      if (speed)
+	*cost += extra_cost->alu.bfx;
+
+      /* We can trust that the immediates used will be correct (there
+	 are no by-register forms), so we need only cost op0.  */
+      *cost += rtx_cost (XEXP (x, 0), (enum rtx_code) code, 0, speed);
       return true;
 
     case MULT:
-      op0 = XEXP (x, 0);
-      op1 = XEXP (x, 1);
+      *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
+      /* aarch64_rtx_mult_cost always handles recursion to its
+	 operands.  */
+      return true;
 
-      *cost = COSTS_N_INSNS (1);
-      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
-	{
-	  if (CONST_INT_P (op1)
-	      && exact_log2 (INTVAL (op1)) > 0)
-	    {
-	      *cost += rtx_cost (op0, ASHIFT, 0, speed);
-	      return true;
-	    }
-
-	  if ((GET_CODE (op0) == ZERO_EXTEND
-	       && GET_CODE (op1) == ZERO_EXTEND)
-	      || (GET_CODE (op0) == SIGN_EXTEND
-		  && GET_CODE (op1) == SIGN_EXTEND))
-	    {
-	      *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
-			+ rtx_cost (XEXP (op1, 0), MULT, 1, speed));
-	      if (speed)
-		*cost += extra_cost->mult[GET_MODE (x) == DImode].extend;
-	      return true;
-	    }
-
-	  if (speed)
-	    *cost += extra_cost->mult[GET_MODE (x) == DImode].simple;
-	}
-      else if (speed)
-	{
-	  if (GET_MODE (x) == DFmode)
-	    *cost += extra_cost->fp[1].mult;
-	  else if (GET_MODE (x) == SFmode)
-	    *cost += extra_cost->fp[0].mult;
-	}
-
-      return false;  /* All arguments need to be in registers.  */
-
     case MOD:
     case UMOD:
-      *cost = COSTS_N_INSNS (2);
       if (speed)
 	{
 	  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
@@ -4804,53 +5974,222 @@
 
     case DIV:
     case UDIV:
-      *cost = COSTS_N_INSNS (1);
+    case SQRT:
       if (speed)
 	{
-	  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
-	    *cost += extra_cost->mult[GET_MODE (x) == DImode].idiv;
-	  else if (GET_MODE (x) == DFmode)
-	    *cost += extra_cost->fp[1].div;
-	  else if (GET_MODE (x) == SFmode)
-	    *cost += extra_cost->fp[0].div;
+	  if (GET_MODE_CLASS (mode) == MODE_INT)
+	    /* There is no integer SQRT, so only DIV and UDIV can get
+	       here.  */
+	    *cost += extra_cost->mult[mode == DImode].idiv;
+	  else
+	    *cost += extra_cost->fp[mode == DFmode].div;
 	}
       return false;  /* All arguments need to be in registers.  */
 
+    case IF_THEN_ELSE:
+      return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
+					 XEXP (x, 2), cost, speed);
+
+    case EQ:
+    case NE:
+    case GT:
+    case GTU:
+    case LT:
+    case LTU:
+    case GE:
+    case GEU:
+    case LE:
+    case LEU:
+
+      return false; /* All arguments must be in registers.  */
+
+    case FMA:
+      op0 = XEXP (x, 0);
+      op1 = XEXP (x, 1);
+      op2 = XEXP (x, 2);
+
+      if (speed)
+	*cost += extra_cost->fp[mode == DFmode].fma;
+
+      /* FMSUB, FNMADD, and FNMSUB are free.  */
+      if (GET_CODE (op0) == NEG)
+        op0 = XEXP (op0, 0);
+
+      if (GET_CODE (op2) == NEG)
+        op2 = XEXP (op2, 0);
+
+      /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
+	 and the by-element operand as operand 0.  */
+      if (GET_CODE (op1) == NEG)
+        op1 = XEXP (op1, 0);
+
+      /* Catch vector-by-element operations.  The by-element operand can
+	 either be (vec_duplicate (vec_select (x))) or just
+	 (vec_select (x)), depending on whether we are multiplying by
+	 a vector or a scalar.
+
+	 Canonicalization is not very good in these cases, FMA4 will put the
+	 by-element operand as operand 0, FNMA4 will have it as operand 1.  */
+      if (GET_CODE (op0) == VEC_DUPLICATE)
+	op0 = XEXP (op0, 0);
+      else if (GET_CODE (op1) == VEC_DUPLICATE)
+	op1 = XEXP (op1, 0);
+
+      if (GET_CODE (op0) == VEC_SELECT)
+	op0 = XEXP (op0, 0);
+      else if (GET_CODE (op1) == VEC_SELECT)
+	op1 = XEXP (op1, 0);
+
+      /* If the remaining parameters are not registers,
+         get the cost to put them into registers.  */
+      *cost += rtx_cost (op0, FMA, 0, speed);
+      *cost += rtx_cost (op1, FMA, 1, speed);
+      *cost += rtx_cost (op2, FMA, 2, speed);
+      return true;
+
+    case FLOAT_EXTEND:
+      if (speed)
+	*cost += extra_cost->fp[mode == DFmode].widen;
+      return false;
+
+    case FLOAT_TRUNCATE:
+      if (speed)
+	*cost += extra_cost->fp[mode == DFmode].narrow;
+      return false;
+
+    case FIX:
+    case UNSIGNED_FIX:
+      x = XEXP (x, 0);
+      /* Strip the rounding part.  They will all be implemented
+         by the fcvt* family of instructions anyway.  */
+      if (GET_CODE (x) == UNSPEC)
+        {
+          unsigned int uns_code = XINT (x, 1);
+
+          if (uns_code == UNSPEC_FRINTA
+              || uns_code == UNSPEC_FRINTM
+              || uns_code == UNSPEC_FRINTN
+              || uns_code == UNSPEC_FRINTP
+              || uns_code == UNSPEC_FRINTZ)
+            x = XVECEXP (x, 0, 0);
+        }
+
+      if (speed)
+        *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
+
+      *cost += rtx_cost (x, (enum rtx_code) code, 0, speed);
+      return true;
+
+    case ABS:
+      if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+	{
+	  /* FABS and FNEG are analogous.  */
+	  if (speed)
+	    *cost += extra_cost->fp[mode == DFmode].neg;
+	}
+      else
+	{
+	  /* Integer ABS will either be split to
+	     two arithmetic instructions, or will be an ABS
+	     (scalar), which we don't model.  */
+	  *cost = COSTS_N_INSNS (2);
+	  if (speed)
+	    *cost += 2 * extra_cost->alu.arith;
+	}
+      return false;
+
+    case SMAX:
+    case SMIN:
+      if (speed)
+	{
+	  /* FMAXNM/FMINNM/FMAX/FMIN.
+	     TODO: This may not be accurate for all implementations, but
+	     we do not model this in the cost tables.  */
+	  *cost += extra_cost->fp[mode == DFmode].addsub;
+	}
+      return false;
+
+    case UNSPEC:
+      /* The floating point round to integer frint* instructions.  */
+      if (aarch64_frint_unspec_p (XINT (x, 1)))
+        {
+          if (speed)
+            *cost += extra_cost->fp[mode == DFmode].roundint;
+
+          return false;
+        }
+
+      if (XINT (x, 1) == UNSPEC_RBIT)
+        {
+          if (speed)
+            *cost += extra_cost->alu.rev;
+
+          return false;
+        }
+      break;
+
+    case TRUNCATE:
+
+      /* Decompose <su>muldi3_highpart.  */
+      if (/* (truncate:DI  */
+	  mode == DImode
+	  /*   (lshiftrt:TI  */
+          && GET_MODE (XEXP (x, 0)) == TImode
+          && GET_CODE (XEXP (x, 0)) == LSHIFTRT
+	  /*      (mult:TI  */
+          && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
+	  /*        (ANY_EXTEND:TI (reg:DI))
+	            (ANY_EXTEND:TI (reg:DI)))  */
+          && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
+               && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
+              || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
+                  && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
+          && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
+          && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
+	  /*     (const_int 64)  */
+          && CONST_INT_P (XEXP (XEXP (x, 0), 1))
+          && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
+        {
+          /* UMULH/SMULH.  */
+	  if (speed)
+	    *cost += extra_cost->mult[mode == DImode].extend;
+          *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
+			     MULT, 0, speed);
+          *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
+			     MULT, 1, speed);
+          return true;
+        }
+
+      /* Fall through.  */
     default:
       break;
     }
-  return false;
+
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    fprintf (dump_file,
+      "\nFailed to cost RTX.  Assuming default cost.\n");
+
+  return true;
 }
 
-static int
-aarch64_address_cost (rtx x ATTRIBUTE_UNUSED,
-		  enum machine_mode mode ATTRIBUTE_UNUSED,
-		  addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
+/* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
+   calculated for X.  This cost is stored in *COST.  Returns true
+   if the total cost of X was calculated.  */
+static bool
+aarch64_rtx_costs_wrapper (rtx x, int code, int outer,
+		   int param, int *cost, bool speed)
 {
-  enum rtx_code c  = GET_CODE (x);
-  const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
+  bool result = aarch64_rtx_costs (x, code, outer, param, cost, speed);
 
-  if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
-    return addr_cost->pre_modify;
-
-  if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
-    return addr_cost->post_modify;
-
-  if (c == PLUS)
+  if (dump_file && (dump_flags & TDF_DETAILS))
     {
-      if (GET_CODE (XEXP (x, 1)) == CONST_INT)
-	return addr_cost->imm_offset;
-      else if (GET_CODE (XEXP (x, 0)) == MULT
-	       || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
-	       || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
-	return addr_cost->register_extend;
-
-      return addr_cost->register_offset;
+      print_rtl_single (dump_file, x);
+      fprintf (dump_file, "\n%s cost: %d (%s)\n",
+	       speed ? "Hot" : "Cold",
+	       *cost, result ? "final" : "partial");
     }
-  else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
-    return addr_cost->imm_offset;
 
-  return 0;
+  return result;
 }
 
 static int
@@ -4862,6 +6201,13 @@
   const struct cpu_regmove_cost *regmove_cost
     = aarch64_tune_params->regmove_cost;
 
+  /* Caller save and pointer regs are equivalent to GENERAL_REGS.  */
+  if (to == CALLER_SAVE_REGS || to == POINTER_REGS)
+    to = GENERAL_REGS;
+
+  if (from == CALLER_SAVE_REGS || from == POINTER_REGS)
+    from = GENERAL_REGS;
+
   /* Moving between GPR and stack cost is the same as GP2GP.  */
   if ((from == GENERAL_REGS && to == STACK_REG)
       || (to == GENERAL_REGS && from == STACK_REG))
@@ -4884,7 +6230,7 @@
      secondary reload.  A general register is used as a scratch to move
      the upper DI value and the lower DI value is moved directly,
      hence the cost is the sum of three moves. */
-  if (! TARGET_SIMD && GET_MODE_SIZE (mode) == 128)
+  if (! TARGET_SIMD && GET_MODE_SIZE (mode) == 16)
     return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
 
   return regmove_cost->FP2FP;
@@ -4905,6 +6251,14 @@
   return aarch64_tune_params->issue_rate;
 }
 
+static int
+aarch64_sched_first_cycle_multipass_dfa_lookahead (void)
+{
+  int issue_rate = aarch64_sched_issue_rate ();
+
+  return issue_rate > 1 ? issue_rate : 0;
+}
+
 /* Vectorizer cost model target hooks.  */
 
 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
@@ -5257,6 +6611,7 @@
   aarch64_tune_flags = selected_tune->flags;
   aarch64_tune = selected_tune->core;
   aarch64_tune_params = selected_tune->tune;
+  aarch64_architecture_version = selected_cpu->architecture_version;
 
   if (aarch64_fix_a53_err835769 == 2)
     {
@@ -5425,8 +6780,8 @@
 	  /* Same reasoning as the tiny code model, but the offset cap here is
 	     4G.  */
 	  if (SYMBOL_REF_WEAK (x)
-	      || INTVAL (offset) < (HOST_WIDE_INT) -4294967263
-	      || INTVAL (offset) > (HOST_WIDE_INT) 4294967264)
+	      || !IN_RANGE (INTVAL (offset), HOST_WIDE_INT_C (-4294967263),
+			    HOST_WIDE_INT_C (4294967264)))
 	    return SYMBOL_FORCE_TO_MEM;
 	  return SYMBOL_SMALL_ABSOLUTE;
 
@@ -6015,7 +7370,7 @@
 
   /* We don't save the size into *PRETEND_SIZE because we want to avoid
      any complication of having crtl->args.pretend_args_size changed.  */
-  cfun->machine->saved_varargs_size
+  cfun->machine->frame.saved_varargs_size
     = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
 		      STACK_BOUNDARY / BITS_PER_UNIT)
        + vr_saved * UNITS_PER_VREG);
@@ -6702,7 +8057,7 @@
       unsigned HOST_WIDE_INT elpart;
       unsigned int part, parts;
 
-      if (GET_CODE (el) == CONST_INT)
+      if (CONST_INT_P (el))
         {
           elpart = INTVAL (el);
           parts = 1;
@@ -6833,30 +8188,6 @@
 #undef CHECK
 }
 
-static bool
-aarch64_const_vec_all_same_int_p (rtx x,
-				  HOST_WIDE_INT minval,
-				  HOST_WIDE_INT maxval)
-{
-  HOST_WIDE_INT firstval;
-  int count, i;
-
-  if (GET_CODE (x) != CONST_VECTOR
-      || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
-    return false;
-
-  firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
-  if (firstval < minval || firstval > maxval)
-    return false;
-
-  count = CONST_VECTOR_NUNITS (x);
-  for (i = 1; i < count; i++)
-    if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
-      return false;
-
-  return true;
-}
-
 /* Check of immediate shift constants are within range.  */
 bool
 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
@@ -6863,9 +8194,9 @@
 {
   int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
   if (left)
-    return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
+    return aarch64_const_vec_all_same_in_range_p (x, 0, bit_width - 1);
   else
-    return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
+    return aarch64_const_vec_all_same_in_range_p (x, 1, bit_width);
 }
 
 /* Return true if X is a uniform vector where all elements
@@ -6903,7 +8234,7 @@
       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
     return true;
 
-  if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
+  if (CONST_INT_P (x))
     return true;
 
   if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
@@ -6940,17 +8271,43 @@
   return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
 }
 
-/* Construct and return a PARALLEL RTX vector.  */
+/* Construct and return a PARALLEL RTX vector with elements numbering the
+   lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
+   the vector - from the perspective of the architecture.  This does not
+   line up with GCC's perspective on lane numbers, so we end up with
+   different masks depending on our target endian-ness.  The diagram
+   below may help.  We must draw the distinction when building masks
+   which select one half of the vector.  An instruction selecting
+   architectural low-lanes for a big-endian target, must be described using
+   a mask selecting GCC high-lanes.
+
+                 Big-Endian             Little-Endian
+
+GCC             0   1   2   3           3   2   1   0
+              | x | x | x | x |       | x | x | x | x |
+Architecture    3   2   1   0           3   2   1   0
+
+Low Mask:         { 2, 3 }                { 0, 1 }
+High Mask:        { 0, 1 }                { 2, 3 }
+*/
+
 rtx
 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
 {
   int nunits = GET_MODE_NUNITS (mode);
   rtvec v = rtvec_alloc (nunits / 2);
-  int base = high ? nunits / 2 : 0;
+  int high_base = nunits / 2;
+  int low_base = 0;
+  int base;
   rtx t1;
   int i;
 
-  for (i=0; i < nunits / 2; i++)
+  if (BYTES_BIG_ENDIAN)
+    base = high ? low_base : high_base;
+  else
+    base = high ? high_base : low_base;
+
+  for (i = 0; i < nunits / 2; i++)
     RTVEC_ELT (v, i) = GEN_INT (base + i);
 
   t1 = gen_rtx_PARALLEL (mode, v);
@@ -6957,6 +8314,38 @@
   return t1;
 }
 
+/* Check OP for validity as a PARALLEL RTX vector with elements
+   numbering the lanes of either the high (HIGH == TRUE) or low lanes,
+   from the perspective of the architecture.  See the diagram above
+   aarch64_simd_vect_par_cnst_half for more details.  */
+
+bool
+aarch64_simd_check_vect_par_cnst_half (rtx op, enum machine_mode mode,
+				       bool high)
+{
+  rtx ideal = aarch64_simd_vect_par_cnst_half (mode, high);
+  HOST_WIDE_INT count_op = XVECLEN (op, 0);
+  HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
+  int i = 0;
+
+  if (!VECTOR_MODE_P (mode))
+    return false;
+
+  if (count_op != count_ideal)
+    return false;
+
+  for (i = 0; i < count_ideal; i++)
+    {
+      rtx elt_op = XVECEXP (op, 0, i);
+      rtx elt_ideal = XVECEXP (ideal, 0, i);
+
+      if (!CONST_INT_P (elt_op)
+	  || INTVAL (elt_ideal) != INTVAL (elt_op))
+	return false;
+    }
+  return true;
+}
+
 /* Bounds-check lanes.  Ensure OPERAND lies between LOW (inclusive) and
    HIGH (exclusive).  */
 void
@@ -6963,7 +8352,7 @@
 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
 {
   HOST_WIDE_INT lane;
-  gcc_assert (GET_CODE (operand) == CONST_INT);
+  gcc_assert (CONST_INT_P (operand));
   lane = INTVAL (operand);
 
   if (lane < low || lane >= high)
@@ -6970,16 +8359,6 @@
     error ("lane out of range");
 }
 
-void
-aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
-{
-  gcc_assert (GET_CODE (operand) == CONST_INT);
-  HOST_WIDE_INT lane = INTVAL (operand);
-
-  if (lane < low || lane >= high)
-    error ("constant out of range");
-}
-
 /* Emit code to reinterpret one AdvSIMD type as another,
    without altering bits.  */
 void
@@ -7011,7 +8390,7 @@
 aarch64_simd_mem_operand_p (rtx op)
 {
   return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
-			|| GET_CODE (XEXP (op, 0)) == REG);
+			|| REG_P (XEXP (op, 0)));
 }
 
 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
@@ -7664,6 +9043,9 @@
   if (!CONST_DOUBLE_P (x))
     return false;
 
+  if (GET_MODE (x) == VOIDmode)
+    return false;
+
   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
 
   /* We cannot represent infinities, NaNs or +/-zero.  We won't
@@ -7916,20 +9298,26 @@
 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
 {
   enum machine_mode vmode = GET_MODE (target);
-  unsigned int i, nelt = GET_MODE_NUNITS (vmode);
+  unsigned int nelt = GET_MODE_NUNITS (vmode);
   bool one_vector_p = rtx_equal_p (op0, op1);
-  rtx rmask[MAX_VECT_LEN], mask;
+  rtx mask;
 
-  gcc_checking_assert (!BYTES_BIG_ENDIAN);
-
   /* The TBL instruction does not use a modulo index, so we must take care
      of that ourselves.  */
-  mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
-  for (i = 0; i < nelt; ++i)
-    rmask[i] = mask;
-  mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
+  mask = aarch64_simd_gen_const_vector_dup (vmode,
+      one_vector_p ? nelt - 1 : 2 * nelt - 1);
   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
 
+  /* For big-endian, we also need to reverse the index within the vector
+     (but not which vector).  */
+  if (BYTES_BIG_ENDIAN)
+    {
+      /* If one_vector_p, mask is a vector of (nelt - 1)'s already.  */
+      if (!one_vector_p)
+        mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1);
+      sel = expand_simple_binop (vmode, XOR, sel, mask,
+				 NULL, 0, OPTAB_LIB_WIDEN);
+    }
   aarch64_expand_vec_perm_1 (target, op0, op1, sel);
 }
 
@@ -8188,7 +9576,145 @@
   return true;
 }
 
+/* Recognize patterns for the EXT insn.  */
+
 static bool
+aarch64_evpc_ext (struct expand_vec_perm_d *d)
+{
+  unsigned int i, nelt = d->nelt;
+  rtx (*gen) (rtx, rtx, rtx, rtx);
+  rtx offset;
+
+  unsigned int location = d->perm[0]; /* Always < nelt.  */
+
+  /* Check if the extracted indices are increasing by one.  */
+  for (i = 1; i < nelt; i++)
+    {
+      unsigned int required = location + i;
+      if (d->one_vector_p)
+        {
+          /* We'll pass the same vector in twice, so allow indices to wrap.  */
+	  required &= (nelt - 1);
+	}
+      if (d->perm[i] != required)
+        return false;
+    }
+
+  switch (d->vmode)
+    {
+    case V16QImode: gen = gen_aarch64_extv16qi; break;
+    case V8QImode: gen = gen_aarch64_extv8qi; break;
+    case V4HImode: gen = gen_aarch64_extv4hi; break;
+    case V8HImode: gen = gen_aarch64_extv8hi; break;
+    case V2SImode: gen = gen_aarch64_extv2si; break;
+    case V4SImode: gen = gen_aarch64_extv4si; break;
+    case V2SFmode: gen = gen_aarch64_extv2sf; break;
+    case V4SFmode: gen = gen_aarch64_extv4sf; break;
+    case V2DImode: gen = gen_aarch64_extv2di; break;
+    case V2DFmode: gen = gen_aarch64_extv2df; break;
+    default:
+      return false;
+    }
+
+  /* Success! */
+  if (d->testing_p)
+    return true;
+
+  /* The case where (location == 0) is a no-op for both big- and little-endian,
+     and is removed by the mid-end at optimization levels -O1 and higher.  */
+
+  if (BYTES_BIG_ENDIAN && (location != 0))
+    {
+      /* After setup, we want the high elements of the first vector (stored
+         at the LSB end of the register), and the low elements of the second
+         vector (stored at the MSB end of the register). So swap.  */
+      rtx temp = d->op0;
+      d->op0 = d->op1;
+      d->op1 = temp;
+      /* location != 0 (above), so safe to assume (nelt - location) < nelt.  */
+      location = nelt - location;
+    }
+
+  offset = GEN_INT (location);
+  emit_insn (gen (d->target, d->op0, d->op1, offset));
+  return true;
+}
+
+/* Recognize patterns for the REV insns.  */
+
+static bool
+aarch64_evpc_rev (struct expand_vec_perm_d *d)
+{
+  unsigned int i, j, diff, nelt = d->nelt;
+  rtx (*gen) (rtx, rtx);
+
+  if (!d->one_vector_p)
+    return false;
+
+  diff = d->perm[0];
+  switch (diff)
+    {
+    case 7:
+      switch (d->vmode)
+	{
+	case V16QImode: gen = gen_aarch64_rev64v16qi; break;
+	case V8QImode: gen = gen_aarch64_rev64v8qi;  break;
+	default:
+	  return false;
+	}
+      break;
+    case 3:
+      switch (d->vmode)
+	{
+	case V16QImode: gen = gen_aarch64_rev32v16qi; break;
+	case V8QImode: gen = gen_aarch64_rev32v8qi;  break;
+	case V8HImode: gen = gen_aarch64_rev64v8hi;  break;
+	case V4HImode: gen = gen_aarch64_rev64v4hi;  break;
+	default:
+	  return false;
+	}
+      break;
+    case 1:
+      switch (d->vmode)
+	{
+	case V16QImode: gen = gen_aarch64_rev16v16qi; break;
+	case V8QImode: gen = gen_aarch64_rev16v8qi;  break;
+	case V8HImode: gen = gen_aarch64_rev32v8hi;  break;
+	case V4HImode: gen = gen_aarch64_rev32v4hi;  break;
+	case V4SImode: gen = gen_aarch64_rev64v4si;  break;
+	case V2SImode: gen = gen_aarch64_rev64v2si;  break;
+	case V4SFmode: gen = gen_aarch64_rev64v4sf;  break;
+	case V2SFmode: gen = gen_aarch64_rev64v2sf;  break;
+	default:
+	  return false;
+	}
+      break;
+    default:
+      return false;
+    }
+
+  for (i = 0; i < nelt ; i += diff + 1)
+    for (j = 0; j <= diff; j += 1)
+      {
+	/* This is guaranteed to be true as the value of diff
+	   is 7, 3, 1 and we should have enough elements in the
+	   queue to generate this.  Getting a vector mask with a
+	   value of diff other than these values implies that
+	   something is wrong by the time we get here.  */
+	gcc_assert (i + j < nelt);
+	if (d->perm[i + j] != i + diff - j)
+	  return false;
+      }
+
+  /* Success! */
+  if (d->testing_p)
+    return true;
+
+  emit_insn (gen (d->target, d->op0));
+  return true;
+}
+
+static bool
 aarch64_evpc_dup (struct expand_vec_perm_d *d)
 {
   rtx (*gen) (rtx, rtx, rtx);
@@ -8198,10 +9724,6 @@
   unsigned int i, elt, nelt = d->nelt;
   rtx lane;
 
-  /* TODO: This may not be big-endian safe.  */
-  if (BYTES_BIG_ENDIAN)
-    return false;
-
   elt = d->perm[0];
   for (i = 1; i < nelt; i++)
     {
@@ -8215,7 +9737,7 @@
      use d->op0 and need not do any extra arithmetic to get the
      correct lane number.  */
   in0 = d->op0;
-  lane = GEN_INT (elt);
+  lane = GEN_INT (elt); /* The pattern corrects for big-endian.  */
 
   switch (vmode)
     {
@@ -8244,11 +9766,6 @@
   enum machine_mode vmode = d->vmode;
   unsigned int i, nelt = d->nelt;
 
-  /* TODO: ARM's TBL indexing is little-endian.  In order to handle GCC's
-     numbering of elements for big-endian, we must reverse the order.  */
-  if (BYTES_BIG_ENDIAN)
-    return false;
-
   if (d->testing_p)
     return true;
 
@@ -8259,7 +9776,15 @@
     return false;
 
   for (i = 0; i < nelt; ++i)
-    rperm[i] = GEN_INT (d->perm[i]);
+    {
+      int nunits = GET_MODE_NUNITS (vmode);
+
+      /* If big-endian and two vectors we end up with a weird mixed-endian
+	 mode on NEON.  Reverse the index within each word but not the word
+	 itself.  */
+      rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1)
+					   : d->perm[i]);
+    }
   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
   sel = force_reg (vmode, sel);
 
@@ -8288,14 +9813,18 @@
 
   if (TARGET_SIMD)
     {
-      if (aarch64_evpc_zip (d))
+      if (aarch64_evpc_rev (d))
 	return true;
+      else if (aarch64_evpc_ext (d))
+	return true;
+      else if (aarch64_evpc_dup (d))
+	return true;
+      else if (aarch64_evpc_zip (d))
+	return true;
       else if (aarch64_evpc_uzp (d))
 	return true;
       else if (aarch64_evpc_trn (d))
 	return true;
-      else if (aarch64_evpc_dup (d))
-	return true;
       return aarch64_evpc_tbl (d);
     }
   return false;
@@ -8414,7 +9943,8 @@
   /* Limited combinations of subregs are safe on FPREGs.  Particularly,
      1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
      2. Scalar to Scalar for integer modes or same size float modes.
-     3. Vector to Vector modes.  */
+     3. Vector to Vector modes.
+     4. On little-endian only, Vector-Structure to Vector modes.  */
   if (GET_MODE_SIZE (from) > GET_MODE_SIZE (to))
     {
       if (aarch64_vector_mode_supported_p (from)
@@ -8430,11 +9960,369 @@
       if (aarch64_vector_mode_supported_p (from)
 	  && aarch64_vector_mode_supported_p (to))
 	return false;
+
+      /* Within an vector structure straddling multiple vector registers
+	 we are in a mixed-endian representation.  As such, we can't
+	 easily change modes for BYTES_BIG_ENDIAN.  Otherwise, we can
+	 switch between vectors and vector structures cheaply.  */
+      if (!BYTES_BIG_ENDIAN)
+	if ((aarch64_vector_mode_supported_p (from)
+	      && aarch64_vect_struct_mode_p (to))
+	    || (aarch64_vector_mode_supported_p (to)
+	      && aarch64_vect_struct_mode_p (from)))
+	  return false;
     }
 
   return true;
 }
 
+/* Implement MODES_TIEABLE_P.  */
+
+bool
+aarch64_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
+{
+  if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
+    return true;
+
+  /* We specifically want to allow elements of "structure" modes to
+     be tieable to the structure.  This more general condition allows
+     other rarer situations too.  */
+  if (TARGET_SIMD
+      && aarch64_vector_mode_p (mode1)
+      && aarch64_vector_mode_p (mode2))
+    return true;
+
+  return false;
+}
+
+/* Return a new RTX holding the result of moving POINTER forward by
+   AMOUNT bytes.  */
+
+static rtx
+aarch64_move_pointer (rtx pointer, int amount)
+{
+  rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
+
+  return adjust_automodify_address (pointer, GET_MODE (pointer),
+				    next, amount);
+}
+
+/* Return a new RTX holding the result of moving POINTER forward by the
+   size of the mode it points to.  */
+
+static rtx
+aarch64_progress_pointer (rtx pointer)
+{
+  HOST_WIDE_INT amount = GET_MODE_SIZE (GET_MODE (pointer));
+
+  return aarch64_move_pointer (pointer, amount);
+}
+
+/* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
+   MODE bytes.  */
+
+static void
+aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
+					      enum machine_mode mode)
+{
+  rtx reg = gen_reg_rtx (mode);
+
+  /* "Cast" the pointers to the correct mode.  */
+  *src = adjust_address (*src, mode, 0);
+  *dst = adjust_address (*dst, mode, 0);
+  /* Emit the memcpy.  */
+  emit_move_insn (reg, *src);
+  emit_move_insn (*dst, reg);
+  /* Move the pointers forward.  */
+  *src = aarch64_progress_pointer (*src);
+  *dst = aarch64_progress_pointer (*dst);
+}
+
+/* Expand movmem, as if from a __builtin_memcpy.  Return true if
+   we succeed, otherwise return false.  */
+
+bool
+aarch64_expand_movmem (rtx *operands)
+{
+  unsigned int n;
+  rtx dst = operands[0];
+  rtx src = operands[1];
+  rtx base;
+  bool speed_p = !optimize_function_for_size_p (cfun);
+
+  /* When optimizing for size, give a better estimate of the length of a
+     memcpy call, but use the default otherwise.  */
+  unsigned int max_instructions = (speed_p ? 15 : AARCH64_CALL_RATIO) / 2;
+
+  /* We can't do anything smart if the amount to copy is not constant.  */
+  if (!CONST_INT_P (operands[2]))
+    return false;
+
+  n = UINTVAL (operands[2]);
+
+  /* Try to keep the number of instructions low.  For cases below 16 bytes we
+     need to make at most two moves.  For cases above 16 bytes it will be one
+     move for each 16 byte chunk, then at most two additional moves.  */
+  if (((n / 16) + (n % 16 ? 2 : 0)) > max_instructions)
+    return false;
+
+  base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
+  dst = adjust_automodify_address (dst, VOIDmode, base, 0);
+
+  base = copy_to_mode_reg (Pmode, XEXP (src, 0));
+  src = adjust_automodify_address (src, VOIDmode, base, 0);
+
+  /* Simple cases.  Copy 0-3 bytes, as (if applicable) a 2-byte, then a
+     1-byte chunk.  */
+  if (n < 4)
+    {
+      if (n >= 2)
+	{
+	  aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
+	  n -= 2;
+	}
+
+      if (n == 1)
+	aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
+
+      return true;
+    }
+
+  /* Copy 4-8 bytes.  First a 4-byte chunk, then (if applicable) a second
+     4-byte chunk, partially overlapping with the previously copied chunk.  */
+  if (n < 8)
+    {
+      aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
+      n -= 4;
+      if (n > 0)
+	{
+	  int move = n - 4;
+
+	  src = aarch64_move_pointer (src, move);
+	  dst = aarch64_move_pointer (dst, move);
+	  aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
+	}
+      return true;
+    }
+
+  /* Copy more than 8 bytes.  Copy chunks of 16 bytes until we run out of
+     them, then (if applicable) an 8-byte chunk.  */
+  while (n >= 8)
+    {
+      if (n / 16)
+	{
+	  aarch64_copy_one_block_and_progress_pointers (&src, &dst, TImode);
+	  n -= 16;
+	}
+      else
+	{
+	  aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
+	  n -= 8;
+	}
+    }
+
+  /* Finish the final bytes of the copy.  We can always do this in one
+     instruction.  We either copy the exact amount we need, or partially
+     overlap with the previous chunk we copied and copy 8-bytes.  */
+  if (n == 0)
+    return true;
+  else if (n == 1)
+    aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
+  else if (n == 2)
+    aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
+  else if (n == 4)
+    aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
+  else
+    {
+      if (n == 3)
+	{
+	  src = aarch64_move_pointer (src, -1);
+	  dst = aarch64_move_pointer (dst, -1);
+	  aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
+	}
+      else
+	{
+	  int move = n - 8;
+
+	  src = aarch64_move_pointer (src, move);
+	  dst = aarch64_move_pointer (dst, move);
+	  aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
+	}
+    }
+
+  return true;
+}
+
+static bool
+aarch64_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
+					unsigned int align,
+					enum by_pieces_operation op,
+					bool speed_p)
+{
+  /* STORE_BY_PIECES can be used when copying a constant string, but
+     in that case each 64-bit chunk takes 5 insns instead of 2 (LDR/STR).
+     For now we always fail this and let the move_by_pieces code copy
+     the string from read-only memory.  */
+  if (op == STORE_BY_PIECES)
+    return false;
+
+  return default_use_by_pieces_infrastructure_p (size, align, op, speed_p);
+}
+
+/* Implement TARGET_SCHED_MACRO_FUSION_P.  Return true if target supports
+   instruction fusion of some sort.  */
+
+static bool
+aarch64_macro_fusion_p (void)
+{
+  return aarch64_tune_params->fuseable_ops != AARCH64_FUSE_NOTHING;
+}
+
+
+/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P.  Return true if PREV and CURR
+   should be kept together during scheduling.  */
+
+static bool
+aarch_macro_fusion_pair_p (rtx prev, rtx curr)
+{
+  rtx set_dest;
+  rtx prev_set = single_set (prev);
+  rtx curr_set = single_set (curr);
+  /* prev and curr are simple SET insns i.e. no flag setting or branching.  */
+  bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
+
+  if (!aarch64_macro_fusion_p ())
+    return false;
+
+  if (simple_sets_p
+      && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_MOV_MOVK))
+    {
+      /* We are trying to match:
+         prev (mov)  == (set (reg r0) (const_int imm16))
+         curr (movk) == (set (zero_extract (reg r0)
+                                           (const_int 16)
+                                           (const_int 16))
+                             (const_int imm16_1))  */
+
+      set_dest = SET_DEST (curr_set);
+
+      if (GET_CODE (set_dest) == ZERO_EXTRACT
+          && CONST_INT_P (SET_SRC (curr_set))
+          && CONST_INT_P (SET_SRC (prev_set))
+          && CONST_INT_P (XEXP (set_dest, 2))
+          && INTVAL (XEXP (set_dest, 2)) == 16
+          && REG_P (XEXP (set_dest, 0))
+          && REG_P (SET_DEST (prev_set))
+          && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
+        {
+          return true;
+        }
+    }
+
+  if (simple_sets_p
+      && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_ADRP_ADD))
+    {
+
+      /*  We're trying to match:
+          prev (adrp) == (set (reg r1)
+                              (high (symbol_ref ("SYM"))))
+          curr (add) == (set (reg r0)
+                             (lo_sum (reg r1)
+                                     (symbol_ref ("SYM"))))
+          Note that r0 need not necessarily be the same as r1, especially
+          during pre-regalloc scheduling.  */
+
+      if (satisfies_constraint_Ush (SET_SRC (prev_set))
+          && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
+        {
+          if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
+              && REG_P (XEXP (SET_SRC (curr_set), 0))
+              && REGNO (XEXP (SET_SRC (curr_set), 0))
+                 == REGNO (SET_DEST (prev_set))
+              && rtx_equal_p (XEXP (SET_SRC (prev_set), 0),
+                              XEXP (SET_SRC (curr_set), 1)))
+            return true;
+        }
+    }
+
+  if (simple_sets_p
+      && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_MOVK_MOVK))
+    {
+
+      /* We're trying to match:
+         prev (movk) == (set (zero_extract (reg r0)
+                                           (const_int 16)
+                                           (const_int 32))
+                             (const_int imm16_1))
+         curr (movk) == (set (zero_extract (reg r0)
+                                           (const_int 16)
+                                           (const_int 48))
+                             (const_int imm16_2))  */
+
+      if (GET_CODE (SET_DEST (prev_set)) == ZERO_EXTRACT
+          && GET_CODE (SET_DEST (curr_set)) == ZERO_EXTRACT
+          && REG_P (XEXP (SET_DEST (prev_set), 0))
+          && REG_P (XEXP (SET_DEST (curr_set), 0))
+          && REGNO (XEXP (SET_DEST (prev_set), 0))
+             == REGNO (XEXP (SET_DEST (curr_set), 0))
+          && CONST_INT_P (XEXP (SET_DEST (prev_set), 2))
+          && CONST_INT_P (XEXP (SET_DEST (curr_set), 2))
+          && INTVAL (XEXP (SET_DEST (prev_set), 2)) == 32
+          && INTVAL (XEXP (SET_DEST (curr_set), 2)) == 48
+          && CONST_INT_P (SET_SRC (prev_set))
+          && CONST_INT_P (SET_SRC (curr_set)))
+        return true;
+
+    }
+  if (simple_sets_p
+      && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_ADRP_LDR))
+    {
+      /* We're trying to match:
+          prev (adrp) == (set (reg r0)
+                              (high (symbol_ref ("SYM"))))
+          curr (ldr) == (set (reg r1)
+                             (mem (lo_sum (reg r0)
+                                             (symbol_ref ("SYM")))))
+                 or
+          curr (ldr) == (set (reg r1)
+                             (zero_extend (mem
+                                           (lo_sum (reg r0)
+                                                   (symbol_ref ("SYM"))))))  */
+      if (satisfies_constraint_Ush (SET_SRC (prev_set))
+          && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
+        {
+          rtx curr_src = SET_SRC (curr_set);
+
+          if (GET_CODE (curr_src) == ZERO_EXTEND)
+            curr_src = XEXP (curr_src, 0);
+
+          if (MEM_P (curr_src) && GET_CODE (XEXP (curr_src, 0)) == LO_SUM
+              && REG_P (XEXP (XEXP (curr_src, 0), 0))
+              && REGNO (XEXP (XEXP (curr_src, 0), 0))
+                 == REGNO (SET_DEST (prev_set))
+              && rtx_equal_p (XEXP (XEXP (curr_src, 0), 1),
+                              XEXP (SET_SRC (prev_set), 0)))
+              return true;
+        }
+    }
+
+  if ((aarch64_tune_params->fuseable_ops & AARCH64_FUSE_CMP_BRANCH)
+      && any_condjump_p (curr))
+    {
+      enum attr_type prev_type = get_attr_type (prev);
+
+      /* FIXME: this misses some which is considered simple arthematic
+         instructions for ThunderX.  Simple shifts are missed here.  */
+      if (prev_type == TYPE_ALUS_REG
+          || prev_type == TYPE_ALUS_IMM
+          || prev_type == TYPE_LOGICS_REG
+          || prev_type == TYPE_LOGICS_IMM)
+        return true;
+    }
+
+  return false;
+}
+
 #undef TARGET_ADDRESS_COST
 #define TARGET_ADDRESS_COST aarch64_address_cost
 
@@ -8561,6 +10449,9 @@
 #undef TARGET_MEMORY_MOVE_COST
 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
 
+#undef TARGET_MIN_DIVISIONS_FOR_RECIP_MUL
+#define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL aarch64_min_divisions_for_recip_mul
+
 #undef TARGET_MUST_PASS_IN_STACK
 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
 
@@ -8583,6 +10474,9 @@
 #undef TARGET_PREFERRED_RELOAD_CLASS
 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
 
+#undef TARGET_SCHED_REASSOCIATION_WIDTH
+#define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
+
 #undef TARGET_SECONDARY_RELOAD
 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
 
@@ -8605,11 +10499,15 @@
 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
 
 #undef TARGET_RTX_COSTS
-#define TARGET_RTX_COSTS aarch64_rtx_costs
+#define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
 
 #undef TARGET_SCHED_ISSUE_RATE
 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
 
+#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
+#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
+  aarch64_sched_first_cycle_multipass_dfa_lookahead
+
 #undef TARGET_TRAMPOLINE_INIT
 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
 
@@ -8643,6 +10541,10 @@
 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
   aarch64_autovectorize_vector_sizes
 
+#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
+#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
+  aarch64_atomic_assign_expand_fenv
+
 /* Section anchor support.  */
 
 #undef TARGET_MIN_ANCHOR_OFFSET
@@ -8674,6 +10576,25 @@
 #undef TARGET_RELAXED_ORDERING
 #define TARGET_RELAXED_ORDERING true
 
+#undef TARGET_FLAGS_REGNUM
+#define TARGET_FLAGS_REGNUM CC_REGNUM
+
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
+
+#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
+#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
+  aarch64_use_by_pieces_infrastructure_p
+
+#undef TARGET_CAN_USE_DOLOOP_P
+#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
+
+#undef TARGET_SCHED_MACRO_FUSION_P
+#define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
+
+#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
+#define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-aarch64.h"
--- a/src/gcc/config/aarch64/aarch64-elf-raw.h
+++ b/src/gcc/config/aarch64/aarch64-elf-raw.h
@@ -23,7 +23,9 @@
 #define GCC_AARCH64_ELF_RAW_H
 
 #define STARTFILE_SPEC " crti%O%s crtbegin%O%s crt0%O%s"
-#define ENDFILE_SPEC " crtend%O%s crtn%O%s"
+#define ENDFILE_SPEC \
+  " crtend%O%s crtn%O%s " \
+  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}"
 
 #ifdef TARGET_FIX_ERR_A53_835769_DEFAULT
 #define CA53_ERR_835769_SPEC \
--- a/src/gcc/config/aarch64/aarch64-linux.h
+++ b/src/gcc/config/aarch64/aarch64-linux.h
@@ -21,7 +21,7 @@
 #ifndef GCC_AARCH64_LINUX_H
 #define GCC_AARCH64_LINUX_H
 
-#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux-aarch64%{mbig-endian:_be}.so.1"
+#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux-aarch64%{mbig-endian:_be}%{mabi=ilp32:_ilp32}.so.1"
 
 #define CPP_SPEC "%{pthread:-D_REENTRANT}"
 
@@ -34,7 +34,7 @@
      %{!shared:-dynamic-linker " GNU_USER_DYNAMIC_LINKER "}} \
    -X						\
    %{mbig-endian:-EB} %{mlittle-endian:-EL}     \
-   -maarch64linux%{mbig-endian:b}"
+   -maarch64linux%{mabi=ilp32:32}%{mbig-endian:b}"
 
 #ifdef TARGET_FIX_ERR_A53_835769_DEFAULT
 #define CA53_ERR_835769_SPEC \
@@ -56,6 +56,14 @@
                   CA53_ERR_835769_SPEC \
                   CA53_ERR_843419_SPEC
 
+#define GNU_USER_TARGET_MATHFILE_SPEC \
+  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}"
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC   \
+  GNU_USER_TARGET_MATHFILE_SPEC " " \
+  GNU_USER_TARGET_ENDFILE_SPEC
+
 #define TARGET_OS_CPP_BUILTINS()		\
   do						\
     {						\
--- a/src/gcc/config/aarch64/iterators.md
+++ b/src/gcc/config/aarch64/iterators.md
@@ -95,6 +95,9 @@
 ;; Vector Float modes.
 (define_mode_iterator VDQF [V2SF V4SF V2DF])
 
+;; Vector Float modes, and DF.
+(define_mode_iterator VDQF_DF [V2SF V4SF V2DF DF])
+
 ;; Vector single Float modes.
 (define_mode_iterator VDQSF [V2SF V4SF])
 
@@ -156,6 +159,9 @@
 ;; Vector modes for H and S types.
 (define_mode_iterator VDQHS [V4HI V8HI V2SI V4SI])
 
+;; Vector modes for H, S and D types.
+(define_mode_iterator VDQHSD [V4HI V8HI V2SI V4SI V2DI])
+
 ;; Vector modes for Q, H and S types.
 (define_mode_iterator VDQQHS [V8QI V16QI V4HI V8HI V2SI V4SI])
 
@@ -177,6 +183,9 @@
 ;; All byte modes.
 (define_mode_iterator VB [V8QI V16QI])
 
+;; 2 and 4 lane SI modes.
+(define_mode_iterator VS [V2SI V4SI])
+
 (define_mode_iterator TX [TI TF])
 
 ;; Opaque structure modes.
@@ -207,8 +216,7 @@
     UNSPEC_FMINNMV	; Used in aarch64-simd.md.
     UNSPEC_FMINV	; Used in aarch64-simd.md.
     UNSPEC_FADDV	; Used in aarch64-simd.md.
-    UNSPEC_SADDV	; Used in aarch64-simd.md.
-    UNSPEC_UADDV	; Used in aarch64-simd.md.
+    UNSPEC_ADDV		; Used in aarch64-simd.md.
     UNSPEC_SMAXV	; Used in aarch64-simd.md.
     UNSPEC_SMINV	; Used in aarch64-simd.md.
     UNSPEC_UMAXV	; Used in aarch64-simd.md.
@@ -273,6 +281,10 @@
     UNSPEC_UZP2		; Used in vector permute patterns.
     UNSPEC_TRN1		; Used in vector permute patterns.
     UNSPEC_TRN2		; Used in vector permute patterns.
+    UNSPEC_EXT		; Used in aarch64-simd.md.
+    UNSPEC_REV64	; Used in vector reverse patterns (permute).
+    UNSPEC_REV32	; Used in vector reverse patterns (permute).
+    UNSPEC_REV16	; Used in vector reverse patterns (permute).
     UNSPEC_AESE		; Used in aarch64-simd.md.
     UNSPEC_AESD         ; Used in aarch64-simd.md.
     UNSPEC_AESMC        ; Used in aarch64-simd.md.
@@ -299,6 +311,10 @@
 ;; 32-bit version and "%x0" in the 64-bit version.
 (define_mode_attr w [(QI "w") (HI "w") (SI "w") (DI "x") (SF "s") (DF "d")])
 
+;; For inequal width int to float conversion
+(define_mode_attr w1 [(SF "w") (DF "x")])
+(define_mode_attr w2 [(SF "x") (DF "w")])
+
 ;; For constraints used in scalar immediate vector moves
 (define_mode_attr hq [(HI "h") (QI "q")])
 
@@ -361,6 +377,9 @@
                          (V2DI "2d") (V2SF "2s")
 			 (V4SF "4s") (V2DF "2d")])
 
+(define_mode_attr Vrevsuff [(V4HI "16") (V8HI "16") (V2SI "32")
+                            (V4SI "32") (V2DI "64")])
+
 (define_mode_attr Vmtype [(V8QI ".8b") (V16QI ".16b")
 			 (V4HI ".4h") (V8HI  ".8h")
 			 (V2SI ".2s") (V4SI  ".4s")
@@ -393,7 +412,8 @@
 			  (V2SI "8b") (V4SI  "16b")
 			  (V2DI "16b") (V2SF  "8b")
 			  (V4SF "16b") (V2DF  "16b")
-			  (DI   "8b")  (DF    "8b")])
+			  (DI   "8b")  (DF    "8b")
+			  (SI   "8b")])
 
 ;; Define element mode for each vector mode.
 (define_mode_attr VEL [(V8QI "QI") (V16QI "QI")
@@ -524,6 +544,14 @@
 				(V2DF "v2di") (DF    "di")
 				(SF   "si")])
 
+;; Lower case element modes (as used in shift immediate patterns).
+(define_mode_attr ve_mode [(V8QI "qi") (V16QI "qi")
+			   (V4HI "hi") (V8HI  "hi")
+			   (V2SI "si") (V4SI  "si")
+			   (DI   "di") (V2DI  "di")
+			   (QI   "qi") (HI    "hi")
+			   (SI   "si")])
+
 ;; Vm for lane instructions is restricted to FP_LO_REGS.
 (define_mode_attr vwx [(V4HI "x") (V8HI "x") (HI "x")
 		       (V2SI "w") (V4SI "w") (SI "w")])
@@ -555,13 +583,43 @@
 
 (define_mode_attr VSTRUCT_DREG [(OI "TI") (CI "EI") (XI "OI")])
 
+;; Mode of pair of elements for each vector mode, to define transfer
+;; size for structure lane/dup loads and stores.
+(define_mode_attr V_TWO_ELEM [(V8QI "HI")   (V16QI "HI")
+                              (V4HI "SI")   (V8HI "SI")
+                              (V2SI "V2SI") (V4SI "V2SI")
+                              (DI "V2DI")   (V2DI "V2DI")
+                              (V2SF "V2SF") (V4SF "V2SF")
+                              (DF "V2DI")   (V2DF "V2DI")])
+
+;; Similar, for three elements.
+(define_mode_attr V_THREE_ELEM [(V8QI "BLK") (V16QI "BLK")
+                                (V4HI "BLK") (V8HI "BLK")
+                                (V2SI "BLK") (V4SI "BLK")
+                                (DI "EI")    (V2DI "EI")
+                                (V2SF "BLK") (V4SF "BLK")
+                                (DF "EI")    (V2DF "EI")])
+
+;; Similar, for four elements.
+(define_mode_attr V_FOUR_ELEM [(V8QI "SI")   (V16QI "SI")
+                               (V4HI "V4HI") (V8HI "V4HI")
+                               (V2SI "V4SI") (V4SI "V4SI")
+                               (DI "OI")     (V2DI "OI")
+                               (V2SF "V4SF") (V4SF "V4SF")
+                               (DF "OI")     (V2DF "OI")])
+
+
 ;; Mode for atomic operation suffixes
 (define_mode_attr atomic_sfx
   [(QI "b") (HI "h") (SI "") (DI "")])
 
-(define_mode_attr fcvt_target [(V2DF "v2di") (V4SF "v4si") (V2SF "v2si")])
-(define_mode_attr FCVT_TARGET [(V2DF "V2DI") (V4SF "V4SI") (V2SF "V2SI")])
+(define_mode_attr fcvt_target [(V2DF "v2di") (V4SF "v4si") (V2SF "v2si") (SF "si") (DF "di")])
+(define_mode_attr FCVT_TARGET [(V2DF "V2DI") (V4SF "V4SI") (V2SF "V2SI") (SF "SI") (DF "DI")])
 
+;; for the inequal width integer to fp conversions
+(define_mode_attr fcvt_iesize [(SF "di") (DF "si")])
+(define_mode_attr FCVT_IESIZE [(SF "DI") (DF "SI")])
+
 (define_mode_attr VSWAP_WIDTH [(V8QI "V16QI") (V16QI "V8QI")
 				(V4HI "V8HI") (V8HI  "V4HI")
 				(V2SI "V4SI") (V4SI  "V2SI")
@@ -616,6 +674,9 @@
 		      (V2DI  "p") (V2DF  "p")
 		      (V2SF "p") (V4SF  "v")])
 
+(define_mode_attr vsi2qi [(V2SI "v8qi") (V4SI "v16qi")])
+(define_mode_attr VSI2QI [(V2SI "V8QI") (V4SI "V16QI")])
+
 ;; -------------------------------------------------------------------
 ;; Code Iterators
 ;; -------------------------------------------------------------------
@@ -629,6 +690,9 @@
 ;; Code iterator for logical operations
 (define_code_iterator LOGICAL [and ior xor])
 
+;; Code iterator for logical operations whose :nlogical works on SIMD registers.
+(define_code_iterator NLOGICAL [and ior])
+
 ;; Code iterator for sign/zero extension
 (define_code_iterator ANY_EXTEND [sign_extend zero_extend])
 
@@ -807,8 +871,6 @@
 (define_int_iterator FMAXMINV [UNSPEC_FMAXV UNSPEC_FMINV
 			       UNSPEC_FMAXNMV UNSPEC_FMINNMV])
 
-(define_int_iterator SUADDV [UNSPEC_SADDV UNSPEC_UADDV])
-
 (define_int_iterator HADDSUB [UNSPEC_SHADD UNSPEC_UHADD
 			      UNSPEC_SRHADD UNSPEC_URHADD
 			      UNSPEC_SHSUB UNSPEC_UHSUB
@@ -856,6 +918,8 @@
 			      UNSPEC_TRN1 UNSPEC_TRN2
 			      UNSPEC_UZP1 UNSPEC_UZP2])
 
+(define_int_iterator REVERSE [UNSPEC_REV64 UNSPEC_REV32 UNSPEC_REV16])
+
 (define_int_iterator FRINT [UNSPEC_FRINTZ UNSPEC_FRINTP UNSPEC_FRINTM
 			     UNSPEC_FRINTN UNSPEC_FRINTI UNSPEC_FRINTX
 			     UNSPEC_FRINTA])
@@ -865,6 +929,10 @@
 
 (define_int_iterator FRECP [UNSPEC_FRECPE UNSPEC_FRECPX])
 
+(define_int_iterator CRC [UNSPEC_CRC32B UNSPEC_CRC32H UNSPEC_CRC32W
+                          UNSPEC_CRC32X UNSPEC_CRC32CB UNSPEC_CRC32CH
+                          UNSPEC_CRC32CW UNSPEC_CRC32CX])
+
 (define_int_iterator CRYPTO_AES [UNSPEC_AESE UNSPEC_AESD])
 (define_int_iterator CRYPTO_AESMC [UNSPEC_AESMC UNSPEC_AESIMC])
 
@@ -907,7 +975,6 @@
 		      (UNSPEC_SUBHN2 "") (UNSPEC_RSUBHN2 "r")
 		      (UNSPEC_SQXTN "s") (UNSPEC_UQXTN "u")
 		      (UNSPEC_USQADD "us") (UNSPEC_SUQADD "su")
-		      (UNSPEC_SADDV "s") (UNSPEC_UADDV "u")
 		      (UNSPEC_SSLI  "s") (UNSPEC_USLI  "u")
 		      (UNSPEC_SSRI  "s") (UNSPEC_USRI  "u")
 		      (UNSPEC_USRA  "u") (UNSPEC_SSRA  "s")
@@ -957,8 +1024,9 @@
 			 (UNSPEC_RADDHN2 "add")
 			 (UNSPEC_RSUBHN2 "sub")])
 
-(define_int_attr offsetlr [(UNSPEC_SSLI	"1") (UNSPEC_USLI "1")
-			   (UNSPEC_SSRI	"0") (UNSPEC_USRI "0")])
+(define_int_attr offsetlr [(UNSPEC_SSLI "") (UNSPEC_USLI "")
+			   (UNSPEC_SSRI "offset_")
+			   (UNSPEC_USRI "offset_")])
 
 ;; Standard pattern names for floating-point rounding instructions.
 (define_int_attr frint_pattern [(UNSPEC_FRINTZ "btrunc")
@@ -983,6 +1051,10 @@
 			    (UNSPEC_TRN1 "trn") (UNSPEC_TRN2 "trn")
 			    (UNSPEC_UZP1 "uzp") (UNSPEC_UZP2 "uzp")])
 
+; op code for REV instructions (size within which elements are reversed).
+(define_int_attr rev_op [(UNSPEC_REV64 "64") (UNSPEC_REV32 "32")
+			 (UNSPEC_REV16 "16")])
+
 (define_int_attr perm_hilo [(UNSPEC_ZIP1 "1") (UNSPEC_ZIP2 "2")
 			    (UNSPEC_TRN1 "1") (UNSPEC_TRN2 "2")
 			    (UNSPEC_UZP1 "1") (UNSPEC_UZP2 "2")])
@@ -989,6 +1061,16 @@
 
 (define_int_attr frecp_suffix  [(UNSPEC_FRECPE "e") (UNSPEC_FRECPX "x")])
 
+(define_int_attr crc_variant [(UNSPEC_CRC32B "crc32b") (UNSPEC_CRC32H "crc32h")
+                        (UNSPEC_CRC32W "crc32w") (UNSPEC_CRC32X "crc32x")
+                        (UNSPEC_CRC32CB "crc32cb") (UNSPEC_CRC32CH "crc32ch")
+                        (UNSPEC_CRC32CW "crc32cw") (UNSPEC_CRC32CX "crc32cx")])
+
+(define_int_attr crc_mode [(UNSPEC_CRC32B "QI") (UNSPEC_CRC32H "HI")
+                        (UNSPEC_CRC32W "SI") (UNSPEC_CRC32X "DI")
+                        (UNSPEC_CRC32CB "QI") (UNSPEC_CRC32CH "HI")
+                        (UNSPEC_CRC32CW "SI") (UNSPEC_CRC32CX "DI")])
+
 (define_int_attr aes_op [(UNSPEC_AESE "e") (UNSPEC_AESD "d")])
 (define_int_attr aesmc_op [(UNSPEC_AESMC "mc") (UNSPEC_AESIMC "imc")])
 
--- a/src/gcc/config/aarch64/aarch64.h
+++ b/src/gcc/config/aarch64/aarch64.h
@@ -26,14 +26,48 @@
 #define TARGET_CPU_CPP_BUILTINS()			\
   do							\
     {							\
-      builtin_define ("__aarch64__");			\
+      builtin_define ("__aarch64__");                   \
+      builtin_define ("__ARM_64BIT_STATE");             \
+      builtin_define_with_int_value                     \
+        ("__ARM_ARCH", aarch64_architecture_version);   \
+      cpp_define_formatted                                              \
+        (parse_in, "__ARM_ARCH_%dA", aarch64_architecture_version);     \
+      builtin_define ("__ARM_ARCH_ISA_A64");            \
+      builtin_define_with_int_value                     \
+        ("__ARM_ARCH_PROFILE", 'A');                    \
+      builtin_define ("__ARM_FEATURE_CLZ");             \
+      builtin_define ("__ARM_FEATURE_IDIV");            \
+      builtin_define ("__ARM_FEATURE_UNALIGNED");       \
+      if (flag_unsafe_math_optimizations)               \
+        builtin_define ("__ARM_FP_FAST");               \
+      builtin_define ("__ARM_PCS_AAPCS64");             \
+      builtin_define_with_int_value                     \
+        ("__ARM_SIZEOF_WCHAR_T", WCHAR_TYPE_SIZE / 8);  \
+      builtin_define_with_int_value                     \
+        ("__ARM_SIZEOF_MINIMAL_ENUM",                   \
+         flag_short_enums? 1 : 4);                      \
       if (TARGET_BIG_END)				\
-	builtin_define ("__AARCH64EB__");		\
+        {                                               \
+          builtin_define ("__AARCH64EB__");             \
+          builtin_define ("__ARM_BIG_ENDIAN");          \
+        }                                               \
       else						\
 	builtin_define ("__AARCH64EL__");		\
 							\
-      if (TARGET_SIMD)					\
-	builtin_define ("__ARM_NEON");			\
+      if (TARGET_FLOAT)                                         \
+        {                                                       \
+          builtin_define ("__ARM_FEATURE_FMA");                 \
+          builtin_define_with_int_value ("__ARM_FP", 0x0C);     \
+        }                                                       \
+      if (TARGET_SIMD)                                          \
+        {                                                       \
+          builtin_define ("__ARM_FEATURE_NUMERIC_MAXMIN");      \
+          builtin_define ("__ARM_NEON");			\
+          builtin_define_with_int_value ("__ARM_NEON_FP", 0x0C);\
+        }                                                       \
+							        \
+      if (TARGET_CRC32)				        \
+	builtin_define ("__ARM_FEATURE_CRC32");		\
 							\
       switch (aarch64_cmodel)				\
 	{						\
@@ -155,6 +189,8 @@
 
 #define PCC_BITFIELD_TYPE_MATTERS	1
 
+/* Major revision number of the ARM Architecture implemented by the target.  */
+extern unsigned aarch64_architecture_version;
 
 /* Instruction tuning/selection flags.  */
 
@@ -188,6 +224,9 @@
 /* Crypto is an optional extension to AdvSIMD.  */
 #define TARGET_CRYPTO (TARGET_SIMD && AARCH64_ISA_CRYPTO)
 
+/* CRC instructions that can be enabled through +crc arch extension.  */
+#define TARGET_CRC32 (AARCH64_ISA_CRC)
+
 /* Standard register usage.  */
 
 /* 31 64-bit general purpose registers R0-R30:
@@ -244,7 +283,7 @@
     1, 1, 1, 1,   1, 1, 1, 1,	/* R0 - R7 */		\
     1, 1, 1, 1,   1, 1, 1, 1,	/* R8 - R15 */		\
     1, 1, 1, 0,   0, 0, 0, 0,	/* R16 - R23 */		\
-    0, 0, 0, 0,   0, 1, 0, 1,	/* R24 - R30, SP */	\
+    0, 0, 0, 0,   0, 1, 1, 1,	/* R24 - R30, SP */	\
     1, 1, 1, 1,   1, 1, 1, 1,	/* V0 - V7 */		\
     0, 0, 0, 0,   0, 0, 0, 0,	/* V8 - V15 */		\
     1, 1, 1, 1,   1, 1, 1, 1,   /* V16 - V23 */         \
@@ -303,7 +342,7 @@
    considered live at the start of the called function.  */
 
 #define EPILOGUE_USES(REGNO) \
-  ((REGNO) == LR_REGNUM)
+  (epilogue_completed && (REGNO) == LR_REGNUM)
 
 /* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
    the stack pointer does not matter.  The value is tested only in
@@ -365,8 +404,7 @@
 
 #define HARD_REGNO_MODE_OK(REGNO, MODE)	aarch64_hard_regno_mode_ok (REGNO, MODE)
 
-#define MODES_TIEABLE_P(MODE1, MODE2)			\
-  (GET_MODE_CLASS (MODE1) == GET_MODE_CLASS (MODE2))
+#define MODES_TIEABLE_P(MODE1, MODE2) aarch64_modes_tieable_p (MODE1, MODE2)
 
 #define DWARF2_UNWIND_INFO 1
 
@@ -409,7 +447,7 @@
 enum reg_class
 {
   NO_REGS,
-  CORE_REGS,
+  CALLER_SAVE_REGS,
   GENERAL_REGS,
   STACK_REG,
   POINTER_REGS,
@@ -424,7 +462,7 @@
 #define REG_CLASS_NAMES				\
 {						\
   "NO_REGS",					\
-  "CORE_REGS",					\
+  "CALLER_SAVE_REGS",				\
   "GENERAL_REGS",				\
   "STACK_REG",					\
   "POINTER_REGS",				\
@@ -436,7 +474,7 @@
 #define REG_CLASS_CONTENTS						\
 {									\
   { 0x00000000, 0x00000000, 0x00000000 },	/* NO_REGS */		\
-  { 0x7fffffff, 0x00000000, 0x00000003 },	/* CORE_REGS */		\
+  { 0x0007ffff, 0x00000000, 0x00000000 },	/* CALLER_SAVE_REGS */	\
   { 0x7fffffff, 0x00000000, 0x00000003 },	/* GENERAL_REGS */	\
   { 0x80000000, 0x00000000, 0x00000000 },	/* STACK_REG */		\
   { 0xffffffff, 0x00000000, 0x00000003 },	/* POINTER_REGS */	\
@@ -447,7 +485,7 @@
 
 #define REGNO_REG_CLASS(REGNO)	aarch64_regno_regclass (REGNO)
 
-#define INDEX_REG_CLASS	CORE_REGS
+#define INDEX_REG_CLASS	GENERAL_REGS
 #define BASE_REG_CLASS  POINTER_REGS
 
 /* Register pairs used to eliminate unneeded registers that point into
@@ -468,7 +506,7 @@
 
 enum target_cpus
 {
-#define AARCH64_CORE(NAME, INTERNAL_IDENT, IDENT, ARCH, FLAGS, COSTS) \
+#define AARCH64_CORE(NAME, INTERNAL_IDENT, SCHED, ARCH, FLAGS, COSTS) \
   TARGET_CPU_##INTERNAL_IDENT,
 #include "aarch64-cores.def"
 #undef AARCH64_CORE
@@ -524,13 +562,33 @@
 struct GTY (()) aarch64_frame
 {
   HOST_WIDE_INT reg_offset[FIRST_PSEUDO_REGISTER];
+
+  /* The number of extra stack bytes taken up by register varargs.
+     This area is allocated by the callee at the very top of the
+     frame.  This value is rounded up to a multiple of
+     STACK_BOUNDARY.  */
+  HOST_WIDE_INT saved_varargs_size;
+
   HOST_WIDE_INT saved_regs_size;
   /* Padding if needed after the all the callee save registers have
      been saved.  */
   HOST_WIDE_INT padding0;
   HOST_WIDE_INT hardfp_offset;	/* HARD_FRAME_POINTER_REGNUM */
-  HOST_WIDE_INT fp_lr_offset;	/* Space needed for saving fp and/or lr */
 
+  /* Offset from the base of the frame (incomming SP) to the
+     hard_frame_pointer.  This value is always a multiple of
+     STACK_BOUNDARY.  */
+  HOST_WIDE_INT hard_fp_offset;
+
+  /* The size of the frame.  This value is the offset from base of the
+   * frame (incomming SP) to the stack_pointer.  This value is always
+   * a multiple of STACK_BOUNDARY.  */
+
+  unsigned wb_candidate1;
+  unsigned wb_candidate2;
+
+  HOST_WIDE_INT frame_size;
+
   bool laid_out;
 };
 
@@ -537,11 +595,6 @@
 typedef struct GTY (()) machine_function
 {
   struct aarch64_frame frame;
-
-  /* The number of extra stack bytes taken up by register varargs.
-     This area is allocated by the callee at the very top of the frame.  */
-  HOST_WIDE_INT saved_varargs_size;
-
 } machine_function;
 #endif
 
@@ -565,11 +618,7 @@
 };
 
 
-extern enum arm_pcs arm_pcs_variant;
 
-#ifndef ARM_DEFAULT_PCS
-#define ARM_DEFAULT_PCS ARM_PCS_AAPCS64
-#endif
 
 /* We can't use enum machine_mode inside a generator file because it
    hasn't been created yet; we shouldn't be using any code that
@@ -670,12 +719,14 @@
 /* The base cost overhead of a memcpy call, for MOVE_RATIO and friends.  */
 #define AARCH64_CALL_RATIO 8
 
-/* When optimizing for size, give a better estimate of the length of a memcpy
-   call, but use the default otherwise.  But move_by_pieces_ninsns() counts
-   memory-to-memory moves, and we'll have to generate a load & store for each,
-   so halve the value to take that into account.  */
+/* MOVE_RATIO dictates when we will use the move_by_pieces infrastructure.
+   move_by_pieces will continually copy the largest safe chunks.  So a
+   7-byte copy is a 4-byte + 2-byte + byte copy.  This proves inefficient
+   for both size and speed of copy, so we will instead use the "movmem"
+   standard name to implement the copy.  This logic does not apply when
+   targeting -mstrict-align, so keep a sensible default in that case.  */
 #define MOVE_RATIO(speed) \
-  (((speed) ? 15 : AARCH64_CALL_RATIO) / 2)
+  (!STRICT_ALIGNMENT ? 2 : (((speed) ? 15 : AARCH64_CALL_RATIO) / 2))
 
 /* For CLEAR_RATIO, when optimizing for size, give a better estimate
    of the length of a memset call, but use the default otherwise.  */
@@ -688,12 +739,6 @@
 #define SET_RATIO(speed) \
   ((speed) ? 15 : AARCH64_CALL_RATIO - 2)
 
-/* STORE_BY_PIECES_P can be used when copying a constant string, but
-   in that case each 64-bit chunk takes 5 insns instead of 2 (LDR/STR).
-   For now we always fail this and let the move_by_pieces code copy
-   the string from read-only memory.  */
-#define STORE_BY_PIECES_P(SIZE, ALIGN) 0
-
 /* Disable auto-increment in move_by_pieces et al.  Use of auto-increment is
    rarely a good idea in straight-line code since it adds an extra address
    dependency between each instruction.  Better to use incrementing offsets.  */
@@ -835,6 +880,11 @@
 
 #define SHIFT_COUNT_TRUNCATED !TARGET_SIMD
 
+/* Choose appropriate mode for caller saves, so we do the minimum
+   required size of load/store.  */
+#define HARD_REGNO_CALLER_SAVE_MODE(REGNO, NREGS, MODE) \
+  aarch64_hard_regno_caller_save_mode ((REGNO), (NREGS), (MODE))
+
 /* Callee only saves lower 64-bits of a 128-bit register.  Tell the
    compiler the callee clobbers the top 64-bits when restoring the
    bottom 64-bits.  */
--- a/src/gcc/config/rs6000/rs6000.c
+++ b/src/gcc/config/rs6000/rs6000.c
@@ -27096,22 +27096,25 @@
     }
 }
 
-/* We are choosing insn from the ready queue.  Return nonzero if INSN can be chosen.  */
+/* We are choosing insn from the ready queue.  Return zero if INSN can be
+   chosen.  */
 static int
-rs6000_use_sched_lookahead_guard (rtx insn)
+rs6000_use_sched_lookahead_guard (rtx insn, int ready_index)
 {
+  if (ready_index == 0)
+    return 0;
+
   if (rs6000_cpu_attr != CPU_CELL)
-    return 1;
+    return 0;
 
-   if (insn == NULL_RTX || !INSN_P (insn))
-     abort ();
+  gcc_assert (insn != NULL_RTX && INSN_P (insn));
 
   if (!reload_completed
       || is_nonpipeline_insn (insn)
       || is_microcoded_insn (insn))
-    return 0;
+    return 1;
 
-  return 1;
+  return 0;
 }
 
 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
--- a/src/gcc/config/arc/arc.c
+++ b/src/gcc/config/arc/arc.c
@@ -398,6 +398,11 @@
 
 static bool arc_frame_pointer_required (void);
 
+static bool arc_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
+						unsigned int,
+						enum by_pieces_operation op,
+						bool);
+
 /* Implements target hook vector_mode_supported_p.  */
 
 static bool
@@ -512,6 +517,10 @@
 #undef TARGET_DELEGITIMIZE_ADDRESS
 #define TARGET_DELEGITIMIZE_ADDRESS arc_delegitimize_address
 
+#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
+#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
+  arc_use_by_pieces_infrastructure_p
+
 /* Usually, we will be able to scale anchor offsets.
    When this fails, we want LEGITIMIZE_ADDRESS to kick in.  */
 #undef TARGET_MIN_ANCHOR_OFFSET
@@ -9355,6 +9364,21 @@
   return false;
 }
 
+/* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P.  */
+
+static bool
+arc_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
+				    unsigned int align,
+				    enum by_pieces_operation op,
+				    bool speed_p)
+{
+  /* Let the movmem expander handle small block moves.  */
+  if (op == MOVE_BY_PIECES)
+    return false;
+
+  return default_use_by_pieces_infrastructure_p (size, align, op, speed_p);
+}
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-arc.h"
--- a/src/gcc/config/arc/arc.h
+++ b/src/gcc/config/arc/arc.h
@@ -1553,12 +1553,6 @@
    in one reasonably fast instruction.  */
 #define MOVE_MAX 4
 
-/* Let the movmem expander handle small block moves.  */
-#define MOVE_BY_PIECES_P(LEN, ALIGN)  0
-#define CAN_MOVE_BY_PIECES(SIZE, ALIGN) \
-  (move_by_pieces_ninsns (SIZE, ALIGN, MOVE_MAX_PIECES + 1) \
-   < (unsigned int) MOVE_RATIO (!optimize_size))
-
 /* Undo the effects of the movmem pattern presence on STORE_BY_PIECES_P .  */
 #define MOVE_RATIO(SPEED) ((SPEED) ? 15 : 3)
 
--- a/src/gcc/config/arm/aarch-cost-tables.h
+++ b/src/gcc/config/arm/aarch-cost-tables.h
@@ -39,6 +39,7 @@
     0,			/* bfi.  */
     0,			/* bfx.  */
     0,			/* clz.  */
+    0,			/* rev.  */
     COSTS_N_INSNS (1),	/* non_exec.  */
     false		/* non_exec_costs_exec.  */
   },
@@ -139,6 +140,7 @@
     COSTS_N_INSNS (1),	/* bfi.  */
     COSTS_N_INSNS (1),	/* bfx.  */
     0,			/* clz.  */
+    0,			/* rev.  */
     0,			/* non_exec.  */
     true		/* non_exec_costs_exec.  */
   },
@@ -239,6 +241,7 @@
     COSTS_N_INSNS (1), /* bfi.  */
     0,                 /* bfx.  */
     0,                 /* clz.  */
+    0,			/* rev.  */
     0,                 /* non_exec.  */
     true               /* non_exec_costs_exec.  */
   },
@@ -322,4 +325,105 @@
   }
 };
 
+const struct cpu_cost_table xgene1_extra_costs =
+{
+  /* ALU */
+  {
+    0,                 /* arith.  */
+    0,                 /* logical.  */
+    0,                 /* shift.  */
+    COSTS_N_INSNS (1), /* shift_reg.  */
+    COSTS_N_INSNS (1), /* arith_shift.  */
+    COSTS_N_INSNS (1), /* arith_shift_reg.  */
+    COSTS_N_INSNS (1), /* log_shift.  */
+    COSTS_N_INSNS (1), /* log_shift_reg.  */
+    COSTS_N_INSNS (1), /* extend.  */
+    0,                 /* extend_arithm.  */
+    COSTS_N_INSNS (1), /* bfi.  */
+    COSTS_N_INSNS (1), /* bfx.  */
+    0,                 /* clz.  */
+    COSTS_N_INSNS (1), /* rev.  */
+    0,                 /* non_exec.  */
+    true               /* non_exec_costs_exec.  */
+  },
+  {
+    /* MULT SImode */
+    {
+      COSTS_N_INSNS (4),       /* simple.  */
+      COSTS_N_INSNS (4),       /* flag_setting.  */
+      COSTS_N_INSNS (4),       /* extend.  */
+      COSTS_N_INSNS (4),       /* add.  */
+      COSTS_N_INSNS (4),       /* extend_add.  */
+      COSTS_N_INSNS (20)       /* idiv.  */
+    },
+    /* MULT DImode */
+    {
+      COSTS_N_INSNS (5),       /* simple.  */
+      0,                       /* flag_setting (N/A).  */
+      COSTS_N_INSNS (5),       /* extend.  */
+      COSTS_N_INSNS (5),       /* add.  */
+      COSTS_N_INSNS (5),       /* extend_add.  */
+      COSTS_N_INSNS (21)       /* idiv.  */
+    }
+  },
+  /* LD/ST */
+  {
+    COSTS_N_INSNS (5),         /* load.  */
+    COSTS_N_INSNS (6),         /* load_sign_extend.  */
+    COSTS_N_INSNS (5),         /* ldrd.  */
+    COSTS_N_INSNS (5),         /* ldm_1st.  */
+    1,                         /* ldm_regs_per_insn_1st.  */
+    1,                         /* ldm_regs_per_insn_subsequent.  */
+    COSTS_N_INSNS (10),        /* loadf.  */
+    COSTS_N_INSNS (10),        /* loadd.  */
+    COSTS_N_INSNS (5),         /* load_unaligned.  */
+    0,                         /* store.  */
+    0,                         /* strd.  */
+    0,                         /* stm_1st.  */
+    1,                         /* stm_regs_per_insn_1st.  */
+    1,                         /* stm_regs_per_insn_subsequent.  */
+    0,                         /* storef.  */
+    0,                         /* stored.  */
+    0,                         /* store_unaligned.  */
+  },
+  {
+    /* FP SFmode */
+    {
+      COSTS_N_INSNS (23),      /* div.  */
+      COSTS_N_INSNS (5),       /* mult.  */
+      COSTS_N_INSNS (5),       /* mult_addsub. */
+      COSTS_N_INSNS (5),       /* fma.  */
+      COSTS_N_INSNS (5),       /* addsub.  */
+      COSTS_N_INSNS (2),       /* fpconst. */
+      COSTS_N_INSNS (3),       /* neg.  */
+      COSTS_N_INSNS (2),       /* compare.  */
+      COSTS_N_INSNS (6),       /* widen.  */
+      COSTS_N_INSNS (6),       /* narrow.  */
+      COSTS_N_INSNS (4),       /* toint.  */
+      COSTS_N_INSNS (4),       /* fromint.  */
+      COSTS_N_INSNS (4)        /* roundint.  */
+    },
+    /* FP DFmode */
+    {
+      COSTS_N_INSNS (29),      /* div.  */
+      COSTS_N_INSNS (5),       /* mult.  */
+      COSTS_N_INSNS (5),       /* mult_addsub.  */
+      COSTS_N_INSNS (5),       /* fma.  */
+      COSTS_N_INSNS (5),       /* addsub.  */
+      COSTS_N_INSNS (3),       /* fpconst.  */
+      COSTS_N_INSNS (3),       /* neg.  */
+      COSTS_N_INSNS (2),       /* compare.  */
+      COSTS_N_INSNS (6),       /* widen.  */
+      COSTS_N_INSNS (6),       /* narrow.  */
+      COSTS_N_INSNS (4),       /* toint.  */
+      COSTS_N_INSNS (4),       /* fromint.  */
+      COSTS_N_INSNS (4)        /* roundint.  */
+    }
+  },
+  /* Vector */
+  {
+    COSTS_N_INSNS (2)  /* alu.  */
+  }
+};
+
 #endif /* GCC_AARCH_COST_TABLES_H */
--- a/src/gcc/config/arm/cortex-a15.md
+++ b/src/gcc/config/arm/cortex-a15.md
@@ -64,7 +64,7 @@
        (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
                         alu_reg,alus_reg,logic_reg,logics_reg,\
                         adc_imm,adcs_imm,adc_reg,adcs_reg,\
-                        adr,bfm,rev,\
+                        adr,bfm,clz,rbit,rev,\
                         shift_imm,shift_reg,\
                         mov_imm,mov_reg,\
                         mvn_imm,mvn_reg,\
@@ -72,11 +72,14 @@
   "ca15_issue1,(ca15_sx1,ca15_sx1_alu)|(ca15_sx2,ca15_sx2_alu)")
 
 ;; ALU ops with immediate shift
+;; crc is also included here so that appropriate scheduling of CRC32 ARMv8-A
+;; instructions can be performed when tuning for the Cortex-A57 since that
+;; core reuses the Cortex-A15 pipeline description for the moment.
 (define_insn_reservation "cortex_a15_alu_shift" 3
   (and (eq_attr "tune" "cortexa15")
        (eq_attr "type" "extend,\
                         alu_shift_imm,alus_shift_imm,\
-                        logic_shift_imm,logics_shift_imm,\
+                        crc,logic_shift_imm,logics_shift_imm,\
                         mov_shift,mvn_shift"))
   "ca15_issue1,(ca15_sx1,ca15_sx1+ca15_sx1_shf,ca15_sx1_alu)\
 	       |(ca15_sx2,ca15_sx2+ca15_sx2_shf,ca15_sx2_alu)")
--- a/src/gcc/config/arm/arm-tables.opt
+++ b/src/gcc/config/arm/arm-tables.opt
@@ -241,6 +241,15 @@
 Enum(processor_type) String(cortex-m0plus) Value(cortexm0plus)
 
 EnumValue
+Enum(processor_type) String(cortex-m1.small-multiply) Value(cortexm1smallmultiply)
+
+EnumValue
+Enum(processor_type) String(cortex-m0.small-multiply) Value(cortexm0smallmultiply)
+
+EnumValue
+Enum(processor_type) String(cortex-m0plus.small-multiply) Value(cortexm0plussmallmultiply)
+
+EnumValue
 Enum(processor_type) String(generic-armv7-a) Value(genericv7a)
 
 EnumValue
@@ -262,6 +271,9 @@
 Enum(processor_type) String(cortex-a15) Value(cortexa15)
 
 EnumValue
+Enum(processor_type) String(cortex-a17) Value(cortexa17)
+
+EnumValue
 Enum(processor_type) String(cortex-r4) Value(cortexr4)
 
 EnumValue
@@ -274,6 +286,9 @@
 Enum(processor_type) String(cortex-r7) Value(cortexr7)
 
 EnumValue
+Enum(processor_type) String(cortex-m7) Value(cortexm7)
+
+EnumValue
 Enum(processor_type) String(cortex-m4) Value(cortexm4)
 
 EnumValue
@@ -286,6 +301,9 @@
 Enum(processor_type) String(cortex-a15.cortex-a7) Value(cortexa15cortexa7)
 
 EnumValue
+Enum(processor_type) String(cortex-a17.cortex-a7) Value(cortexa17cortexa7)
+
+EnumValue
 Enum(processor_type) String(cortex-a53) Value(cortexa53)
 
 EnumValue
@@ -292,8 +310,17 @@
 Enum(processor_type) String(cortex-a57) Value(cortexa57)
 
 EnumValue
+Enum(processor_type) String(cortex-a72) Value(cortexa72)
+
+EnumValue
+Enum(processor_type) String(xgene1) Value(xgene1)
+
+EnumValue
 Enum(processor_type) String(cortex-a57.cortex-a53) Value(cortexa57cortexa53)
 
+EnumValue
+Enum(processor_type) String(cortex-a72.cortex-a53) Value(cortexa72cortexa53)
+
 Enum
 Name(arm_arch) Type(int)
 Known ARM architectures (for use with the -march= option):
@@ -423,17 +450,23 @@
 Enum(arm_fpu) String(fpv4-sp-d16) Value(11)
 
 EnumValue
-Enum(arm_fpu) String(neon-vfpv4) Value(12)
+Enum(arm_fpu) String(fpv5-sp-d16) Value(12)
 
 EnumValue
-Enum(arm_fpu) String(fp-armv8) Value(13)
+Enum(arm_fpu) String(fpv5-d16) Value(13)
 
 EnumValue
-Enum(arm_fpu) String(neon-fp-armv8) Value(14)
+Enum(arm_fpu) String(neon-vfpv4) Value(14)
 
 EnumValue
-Enum(arm_fpu) String(crypto-neon-fp-armv8) Value(15)
+Enum(arm_fpu) String(fp-armv8) Value(15)
 
 EnumValue
-Enum(arm_fpu) String(vfp3) Value(16)
+Enum(arm_fpu) String(neon-fp-armv8) Value(16)
 
+EnumValue
+Enum(arm_fpu) String(crypto-neon-fp-armv8) Value(17)
+
+EnumValue
+Enum(arm_fpu) String(vfp3) Value(18)
+
--- a/src/gcc/config/arm/thumb2.md
+++ b/src/gcc/config/arm/thumb2.md
@@ -267,6 +267,17 @@
    (set_attr "type" "multiple")]
 )
 
+;; Pop a single register as its size is preferred over a post-incremental load
+(define_insn "*thumb2_pop_single"
+  [(set (match_operand:SI 0 "low_register_operand" "=r")
+        (mem:SI (post_inc:SI (reg:SI SP_REGNUM))))]
+  "TARGET_THUMB2 && (reload_in_progress || reload_completed)"
+  "pop\t{%0}"
+  [(set_attr "type" "load1")
+   (set_attr "length" "2")
+   (set_attr "predicable" "yes")]
+)
+
 ;; We have two alternatives here for memory loads (and similarly for stores)
 ;; to reflect the fact that the permissible constant pool ranges differ
 ;; between ldr instructions taking low regs and ldr instructions taking high
@@ -329,7 +340,7 @@
    movw%?\\t%0, %L1\\t%@ movhi
    str%(h%)\\t%1, %0\\t%@ movhi
    ldr%(h%)\\t%0, %1\\t%@ movhi"
-  [(set_attr "type" "mov_reg,mov_imm,mov_imm,mov_reg,store1,load1")
+  [(set_attr "type" "mov_reg,mov_imm,mov_imm,mov_imm,store1,load1")
    (set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "yes,no,yes,no,no,no")
    (set_attr "length" "2,4,2,4,4,4")
@@ -1370,6 +1381,103 @@
    (set_attr "type" "alu_reg")]
 )
 
+; Constants for op 2 will never be given to these patterns.
+(define_insn_and_split "*iordi_notdi_di"
+  [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
+	(ior:DI (not:DI (match_operand:DI 1 "s_register_operand" "0,r"))
+		(match_operand:DI 2 "s_register_operand" "r,0")))]
+  "TARGET_THUMB2"
+  "#"
+  "TARGET_THUMB2 && reload_completed"
+  [(set (match_dup 0) (ior:SI (not:SI (match_dup 1)) (match_dup 2)))
+   (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
+  "
+  {
+    operands[3] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[4] = gen_highpart (SImode, operands[1]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+    operands[5] = gen_highpart (SImode, operands[2]);
+    operands[2] = gen_lowpart (SImode, operands[2]);
+  }"
+  [(set_attr "length" "8")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn_and_split "*iordi_notzesidi_di"
+  [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
+	(ior:DI (not:DI (zero_extend:DI
+			 (match_operand:SI 2 "s_register_operand" "r,r")))
+		(match_operand:DI 1 "s_register_operand" "0,?r")))]
+  "TARGET_THUMB2"
+  "#"
+  ; (not (zero_extend...)) means operand0 will always be 0xffffffff
+  "TARGET_THUMB2 && reload_completed"
+  [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
+   (set (match_dup 3) (const_int -1))]
+  "
+  {
+    operands[3] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+  }"
+  [(set_attr "length" "4,8")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn_and_split "*iordi_notdi_zesidi"
+  [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
+	(ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "0,?r"))
+		(zero_extend:DI
+		 (match_operand:SI 1 "s_register_operand" "r,r"))))]
+  "TARGET_THUMB2"
+  "#"
+  "TARGET_THUMB2 && reload_completed"
+  [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
+   (set (match_dup 3) (not:SI (match_dup 4)))]
+  "
+  {
+    operands[3] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+    operands[4] = gen_highpart (SImode, operands[2]);
+    operands[2] = gen_lowpart (SImode, operands[2]);
+  }"
+  [(set_attr "length" "8")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn_and_split "*iordi_notsesidi_di"
+  [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
+	(ior:DI (not:DI (sign_extend:DI
+			 (match_operand:SI 2 "s_register_operand" "r,r")))
+		(match_operand:DI 1 "s_register_operand" "0,r")))]
+  "TARGET_THUMB2"
+  "#"
+  "TARGET_THUMB2 && reload_completed"
+  [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
+   (set (match_dup 3) (ior:SI (not:SI
+				(ashiftrt:SI (match_dup 2) (const_int 31)))
+			       (match_dup 4)))]
+  "
+  {
+    operands[3] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[4] = gen_highpart (SImode, operands[1]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+  }"
+  [(set_attr "length" "8")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "multiple")]
+)
+
 (define_insn "*orsi_notsi_si"
   [(set (match_operand:SI 0 "s_register_operand" "=r")
 	(ior:SI (not:SI (match_operand:SI 2 "s_register_operand" "r"))
--- a/src/gcc/config/arm/arm.c
+++ b/src/gcc/config/arm/arm.c
@@ -50,6 +50,7 @@
 #include "except.h"
 #include "tm_p.h"
 #include "target.h"
+#include "sched-int.h"
 #include "target-def.h"
 #include "debug.h"
 #include "langhooks.h"
@@ -59,6 +60,8 @@
 #include "params.h"
 #include "opts.h"
 #include "dumpfile.h"
+#include "gimple-expr.h"
+#include "sched-int.h"
 
 /* Forward definitions of types.  */
 typedef struct minipool_node    Mnode;
@@ -93,6 +96,7 @@
 static bool thumb_force_lr_save (void);
 static unsigned arm_size_return_regs (void);
 static bool arm_assemble_integer (rtx, unsigned int, int);
+static void arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update);
 static void arm_print_operand (FILE *, rtx, int);
 static void arm_print_operand_address (FILE *, rtx);
 static bool arm_print_operand_punct_valid_p (unsigned char code);
@@ -234,8 +238,11 @@
 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
 static void arm_option_override (void);
 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
+static bool arm_macro_fusion_p (void);
 static bool arm_cannot_copy_insn_p (rtx);
 static int arm_issue_rate (void);
+static int arm_first_cycle_multipass_dfa_lookahead (void);
+static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx, int);
 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
 static bool arm_output_addr_const_extra (FILE *, rtx);
 static bool arm_allocate_stack_slots_for_args (void);
@@ -274,6 +281,8 @@
 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
 					     const unsigned char *sel);
 
+static bool aarch_macro_fusion_pair_p (rtx, rtx);
+
 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
 					   tree vectype,
 					   int misalign ATTRIBUTE_UNUSED);
@@ -379,6 +388,12 @@
 #undef  TARGET_COMP_TYPE_ATTRIBUTES
 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
 
+#undef TARGET_SCHED_MACRO_FUSION_P
+#define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
+
+#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
+#define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
+
 #undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
 
@@ -581,9 +596,20 @@
 #undef TARGET_SCHED_ISSUE_RATE
 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
 
+#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
+#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
+  arm_first_cycle_multipass_dfa_lookahead
+
+#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
+#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
+  arm_first_cycle_multipass_dfa_lookahead_guard
+
 #undef TARGET_MANGLE_TYPE
 #define TARGET_MANGLE_TYPE arm_mangle_type
 
+#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
+#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
+
 #undef TARGET_BUILD_BUILTIN_VA_LIST
 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
 #undef TARGET_EXPAND_BUILTIN_VA_START
@@ -740,6 +766,8 @@
 #define FL_ARCH8      (1 << 24)       /* Architecture 8.  */
 #define FL_CRC32      (1 << 25)	      /* ARMv8 CRC32 instructions.  */
 
+#define FL_SMALLMUL   (1 << 26)       /* Small multiply supported.  */
+
 #define FL_IWMMXT     (1 << 29)	      /* XScale v2 or "Intel Wireless MMX technology".  */
 #define FL_IWMMXT2    (1 << 30)       /* "Intel Wireless MMX2 technology".  */
 
@@ -907,6 +935,9 @@
 /* Nonzero if chip supports the ARMv8 CRC instructions.  */
 int arm_arch_crc = 0;
 
+/* Nonzero if the core has a very small, high-latency, multiply unit.  */
+int arm_m_profile_small_mul = 0;
+
 /* The condition codes of the ARM, and the inverse function.  */
 static const char * const arm_condition_codes[] =
 {
@@ -985,6 +1016,7 @@
     COSTS_N_INSNS (1),	/* bfi.  */
     COSTS_N_INSNS (1),	/* bfx.  */
     0,			/* clz.  */
+    0,			/* rev.  */
     0,			/* non_exec.  */
     true		/* non_exec_costs_exec.  */
   },
@@ -1068,7 +1100,210 @@
   }
 };
 
+const struct cpu_cost_table cortexa8_extra_costs =
+{
+  /* ALU */
+  {
+    0,			/* arith.  */
+    0,			/* logical.  */
+    COSTS_N_INSNS (1),	/* shift.  */
+    0,			/* shift_reg.  */
+    COSTS_N_INSNS (1),	/* arith_shift.  */
+    0,			/* arith_shift_reg.  */
+    COSTS_N_INSNS (1),	/* log_shift.  */
+    0,			/* log_shift_reg.  */
+    0,			/* extend.  */
+    0,			/* extend_arith.  */
+    0,			/* bfi.  */
+    0,			/* bfx.  */
+    0,			/* clz.  */
+    0,			/* rev.  */
+    0,			/* non_exec.  */
+    true		/* non_exec_costs_exec.  */
+  },
+  {
+    /* MULT SImode */
+    {
+      COSTS_N_INSNS (1),	/* simple.  */
+      COSTS_N_INSNS (1),	/* flag_setting.  */
+      COSTS_N_INSNS (1),	/* extend.  */
+      COSTS_N_INSNS (1),	/* add.  */
+      COSTS_N_INSNS (1),	/* extend_add.  */
+      COSTS_N_INSNS (30)	/* idiv.  No HW div on Cortex A8.  */
+    },
+    /* MULT DImode */
+    {
+      0,			/* simple (N/A).  */
+      0,			/* flag_setting (N/A).  */
+      COSTS_N_INSNS (2),	/* extend.  */
+      0,			/* add (N/A).  */
+      COSTS_N_INSNS (2),	/* extend_add.  */
+      0				/* idiv (N/A).  */
+    }
+  },
+  /* LD/ST */
+  {
+    COSTS_N_INSNS (1),	/* load.  */
+    COSTS_N_INSNS (1),	/* load_sign_extend.  */
+    COSTS_N_INSNS (1),	/* ldrd.  */
+    COSTS_N_INSNS (1),	/* ldm_1st.  */
+    1,			/* ldm_regs_per_insn_1st.  */
+    2,			/* ldm_regs_per_insn_subsequent.  */
+    COSTS_N_INSNS (1),	/* loadf.  */
+    COSTS_N_INSNS (1),	/* loadd.  */
+    COSTS_N_INSNS (1),  /* load_unaligned.  */
+    COSTS_N_INSNS (1),	/* store.  */
+    COSTS_N_INSNS (1),	/* strd.  */
+    COSTS_N_INSNS (1),	/* stm_1st.  */
+    1,			/* stm_regs_per_insn_1st.  */
+    2,			/* stm_regs_per_insn_subsequent.  */
+    COSTS_N_INSNS (1),	/* storef.  */
+    COSTS_N_INSNS (1),	/* stored.  */
+    COSTS_N_INSNS (1)	/* store_unaligned.  */
+  },
+  {
+    /* FP SFmode */
+    {
+      COSTS_N_INSNS (36),	/* div.  */
+      COSTS_N_INSNS (11),	/* mult.  */
+      COSTS_N_INSNS (20),	/* mult_addsub. */
+      COSTS_N_INSNS (30),	/* fma.  */
+      COSTS_N_INSNS (9),	/* addsub.  */
+      COSTS_N_INSNS (3),	/* fpconst.  */
+      COSTS_N_INSNS (3),	/* neg.  */
+      COSTS_N_INSNS (6),	/* compare.  */
+      COSTS_N_INSNS (4),	/* widen.  */
+      COSTS_N_INSNS (4),	/* narrow.  */
+      COSTS_N_INSNS (8),	/* toint.  */
+      COSTS_N_INSNS (8),	/* fromint.  */
+      COSTS_N_INSNS (8)		/* roundint.  */
+    },
+    /* FP DFmode */
+    {
+      COSTS_N_INSNS (64),	/* div.  */
+      COSTS_N_INSNS (16),	/* mult.  */
+      COSTS_N_INSNS (25),	/* mult_addsub.  */
+      COSTS_N_INSNS (30),	/* fma.  */
+      COSTS_N_INSNS (9),	/* addsub.  */
+      COSTS_N_INSNS (3),	/* fpconst.  */
+      COSTS_N_INSNS (3),	/* neg.  */
+      COSTS_N_INSNS (6),	/* compare.  */
+      COSTS_N_INSNS (6),	/* widen.  */
+      COSTS_N_INSNS (6),	/* narrow.  */
+      COSTS_N_INSNS (8),	/* toint.  */
+      COSTS_N_INSNS (8),	/* fromint.  */
+      COSTS_N_INSNS (8)		/* roundint.  */
+    }
+  },
+  /* Vector */
+  {
+    COSTS_N_INSNS (1)	/* alu.  */
+  }
+};
 
+const struct cpu_cost_table cortexa5_extra_costs =
+{
+  /* ALU */
+  {
+    0,			/* arith.  */
+    0,			/* logical.  */
+    COSTS_N_INSNS (1),	/* shift.  */
+    COSTS_N_INSNS (1),	/* shift_reg.  */
+    COSTS_N_INSNS (1),	/* arith_shift.  */
+    COSTS_N_INSNS (1),	/* arith_shift_reg.  */
+    COSTS_N_INSNS (1),	/* log_shift.  */
+    COSTS_N_INSNS (1),	/* log_shift_reg.  */
+    COSTS_N_INSNS (1),	/* extend.  */
+    COSTS_N_INSNS (1),	/* extend_arith.  */
+    COSTS_N_INSNS (1),	/* bfi.  */
+    COSTS_N_INSNS (1),	/* bfx.  */
+    COSTS_N_INSNS (1),	/* clz.  */
+    COSTS_N_INSNS (1),	/* rev.  */
+    0,			/* non_exec.  */
+    true		/* non_exec_costs_exec.  */
+  },
+
+  {
+    /* MULT SImode */
+    {
+      0,			/* simple.  */
+      COSTS_N_INSNS (1),	/* flag_setting.  */
+      COSTS_N_INSNS (1),	/* extend.  */
+      COSTS_N_INSNS (1),	/* add.  */
+      COSTS_N_INSNS (1),	/* extend_add.  */
+      COSTS_N_INSNS (7)		/* idiv.  */
+    },
+    /* MULT DImode */
+    {
+      0,			/* simple (N/A).  */
+      0,			/* flag_setting (N/A).  */
+      COSTS_N_INSNS (1),	/* extend.  */
+      0,			/* add.  */
+      COSTS_N_INSNS (2),	/* extend_add.  */
+      0				/* idiv (N/A).  */
+    }
+  },
+  /* LD/ST */
+  {
+    COSTS_N_INSNS (1),	/* load.  */
+    COSTS_N_INSNS (1),	/* load_sign_extend.  */
+    COSTS_N_INSNS (6),	/* ldrd.  */
+    COSTS_N_INSNS (1),	/* ldm_1st.  */
+    1,			/* ldm_regs_per_insn_1st.  */
+    2,			/* ldm_regs_per_insn_subsequent.  */
+    COSTS_N_INSNS (2),	/* loadf.  */
+    COSTS_N_INSNS (4),	/* loadd.  */
+    COSTS_N_INSNS (1),	/* load_unaligned.  */
+    COSTS_N_INSNS (1),	/* store.  */
+    COSTS_N_INSNS (3),	/* strd.  */
+    COSTS_N_INSNS (1),	/* stm_1st.  */
+    1,			/* stm_regs_per_insn_1st.  */
+    2,			/* stm_regs_per_insn_subsequent.  */
+    COSTS_N_INSNS (2),	/* storef.  */
+    COSTS_N_INSNS (2),	/* stored.  */
+    COSTS_N_INSNS (1)	/* store_unaligned.  */
+  },
+  {
+    /* FP SFmode */
+    {
+      COSTS_N_INSNS (15),	/* div.  */
+      COSTS_N_INSNS (3),	/* mult.  */
+      COSTS_N_INSNS (7),	/* mult_addsub. */
+      COSTS_N_INSNS (7),	/* fma.  */
+      COSTS_N_INSNS (3),	/* addsub.  */
+      COSTS_N_INSNS (3),	/* fpconst.  */
+      COSTS_N_INSNS (3),	/* neg.  */
+      COSTS_N_INSNS (3),	/* compare.  */
+      COSTS_N_INSNS (3),	/* widen.  */
+      COSTS_N_INSNS (3),	/* narrow.  */
+      COSTS_N_INSNS (3),	/* toint.  */
+      COSTS_N_INSNS (3),	/* fromint.  */
+      COSTS_N_INSNS (3)		/* roundint.  */
+    },
+    /* FP DFmode */
+    {
+      COSTS_N_INSNS (30),	/* div.  */
+      COSTS_N_INSNS (6),	/* mult.  */
+      COSTS_N_INSNS (10),	/* mult_addsub.  */
+      COSTS_N_INSNS (7),	/* fma.  */
+      COSTS_N_INSNS (3),	/* addsub.  */
+      COSTS_N_INSNS (3),	/* fpconst.  */
+      COSTS_N_INSNS (3),	/* neg.  */
+      COSTS_N_INSNS (3),	/* compare.  */
+      COSTS_N_INSNS (3),	/* widen.  */
+      COSTS_N_INSNS (3),	/* narrow.  */
+      COSTS_N_INSNS (3),	/* toint.  */
+      COSTS_N_INSNS (3),	/* fromint.  */
+      COSTS_N_INSNS (3)		/* roundint.  */
+    }
+  },
+  /* Vector */
+  {
+    COSTS_N_INSNS (1)	/* alu.  */
+  }
+};
+
+
 const struct cpu_cost_table cortexa7_extra_costs =
 {
   /* ALU */
@@ -1086,6 +1321,7 @@
     COSTS_N_INSNS (1),	/* bfi.  */
     COSTS_N_INSNS (1),	/* bfx.  */
     COSTS_N_INSNS (1),	/* clz.  */
+    COSTS_N_INSNS (1),	/* rev.  */
     0,			/* non_exec.  */
     true		/* non_exec_costs_exec.  */
   },
@@ -1187,6 +1423,7 @@
     0,			/* bfi.  */
     COSTS_N_INSNS (1),	/* bfx.  */
     COSTS_N_INSNS (1),	/* clz.  */
+    COSTS_N_INSNS (1),	/* rev.  */
     0,			/* non_exec.  */
     true		/* non_exec_costs_exec.  */
   },
@@ -1287,6 +1524,7 @@
     COSTS_N_INSNS (1),	/* bfi.  */
     0,			/* bfx.  */
     0,			/* clz.  */
+    0,			/* rev.  */
     0,			/* non_exec.  */
     true		/* non_exec_costs_exec.  */
   },
@@ -1387,6 +1625,7 @@
     0,			/* bfi.  */
     0,			/* bfx.  */
     0,			/* clz.  */
+    0,			/* rev.  */
     COSTS_N_INSNS (1),	/* non_exec.  */
     false		/* non_exec_costs_exec.  */
   },
@@ -1470,6 +1709,9 @@
   }
 };
 
+#define ARM_FUSE_NOTHING	(0)
+#define ARM_FUSE_MOVW_MOVT	(1 << 0)
+
 const struct tune_params arm_slowmul_tune =
 {
   arm_slowmul_rtx_costs,
@@ -1483,7 +1725,10 @@
   false,					/* Prefer LDRD/STRD.  */
   {true, true},					/* Prefer non short circuit.  */
   &arm_default_vec_cost,                        /* Vectorizer costs.  */
-  false                                         /* Prefer Neon for 64-bits bitops.  */
+  false,                                        /* Prefer Neon for 64-bits bitops.  */
+  false, false,                                 /* Prefer 32-bit encodings.  */
+  ARM_SCHED_AUTOPREF_OFF,			/* Sched L2 autopref.  */
+  ARM_FUSE_NOTHING				/* Fuseable pairs of instructions.  */
 };
 
 const struct tune_params arm_fastmul_tune =
@@ -1499,7 +1744,10 @@
   false,					/* Prefer LDRD/STRD.  */
   {true, true},					/* Prefer non short circuit.  */
   &arm_default_vec_cost,                        /* Vectorizer costs.  */
-  false                                         /* Prefer Neon for 64-bits bitops.  */
+  false,                                        /* Prefer Neon for 64-bits bitops.  */
+  false, false,                                 /* Prefer 32-bit encodings.  */
+  ARM_SCHED_AUTOPREF_OFF,			/* Sched L2 autopref.  */
+  ARM_FUSE_NOTHING				/* Fuseable pairs of instructions.  */
 };
 
 /* StrongARM has early execution of branches, so a sequence that is worth
@@ -1518,7 +1766,10 @@
   false,					/* Prefer LDRD/STRD.  */
   {true, true},					/* Prefer non short circuit.  */
   &arm_default_vec_cost,                        /* Vectorizer costs.  */
-  false                                         /* Prefer Neon for 64-bits bitops.  */
+  false,                                        /* Prefer Neon for 64-bits bitops.  */
+  false, false,                                 /* Prefer 32-bit encodings.  */
+  ARM_SCHED_AUTOPREF_OFF,			/* Sched L2 autopref.  */
+  ARM_FUSE_NOTHING				/* Fuseable pairs of instructions.  */
 };
 
 const struct tune_params arm_xscale_tune =
@@ -1534,7 +1785,10 @@
   false,					/* Prefer LDRD/STRD.  */
   {true, true},					/* Prefer non short circuit.  */
   &arm_default_vec_cost,                        /* Vectorizer costs.  */
-  false                                         /* Prefer Neon for 64-bits bitops.  */
+  false,                                        /* Prefer Neon for 64-bits bitops.  */
+  false, false,                                 /* Prefer 32-bit encodings.  */
+  ARM_SCHED_AUTOPREF_OFF,			/* Sched L2 autopref.  */
+  ARM_FUSE_NOTHING				/* Fuseable pairs of instructions.  */
 };
 
 const struct tune_params arm_9e_tune =
@@ -1550,7 +1804,10 @@
   false,					/* Prefer LDRD/STRD.  */
   {true, true},					/* Prefer non short circuit.  */
   &arm_default_vec_cost,                        /* Vectorizer costs.  */
-  false                                         /* Prefer Neon for 64-bits bitops.  */
+  false,                                        /* Prefer Neon for 64-bits bitops.  */
+  false, false,                                 /* Prefer 32-bit encodings.  */
+  ARM_SCHED_AUTOPREF_OFF,			/* Sched L2 autopref.  */
+  ARM_FUSE_NOTHING				/* Fuseable pairs of instructions.  */
 };
 
 const struct tune_params arm_v6t2_tune =
@@ -1566,7 +1823,10 @@
   false,					/* Prefer LDRD/STRD.  */
   {true, true},					/* Prefer non short circuit.  */
   &arm_default_vec_cost,                        /* Vectorizer costs.  */
-  false                                         /* Prefer Neon for 64-bits bitops.  */
+  false,                                        /* Prefer Neon for 64-bits bitops.  */
+  false, false,                                 /* Prefer 32-bit encodings.  */
+  ARM_SCHED_AUTOPREF_OFF,			/* Sched L2 autopref.  */
+  ARM_FUSE_NOTHING				/* Fuseable pairs of instructions.  */
 };
 
 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
@@ -1583,9 +1843,31 @@
   false,					/* Prefer LDRD/STRD.  */
   {true, true},					/* Prefer non short circuit.  */
   &arm_default_vec_cost,                        /* Vectorizer costs.  */
-  false                                         /* Prefer Neon for 64-bits bitops.  */
+  false,                                        /* Prefer Neon for 64-bits bitops.  */
+  false, false,                                 /* Prefer 32-bit encodings.  */
+  ARM_SCHED_AUTOPREF_OFF,			/* Sched L2 autopref.  */
+  ARM_FUSE_NOTHING				/* Fuseable pairs of instructions.  */
 };
 
+const struct tune_params arm_cortex_a8_tune =
+{
+  arm_9e_rtx_costs,
+  &cortexa8_extra_costs,
+  NULL,						/* Sched adj cost.  */
+  1,						/* Constant limit.  */
+  5,						/* Max cond insns.  */
+  ARM_PREFETCH_NOT_BENEFICIAL,
+  false,					/* Prefer constant pool.  */
+  arm_default_branch_cost,
+  false,					/* Prefer LDRD/STRD.  */
+  {true, true},					/* Prefer non short circuit.  */
+  &arm_default_vec_cost,                        /* Vectorizer costs.  */
+  false,                                        /* Prefer Neon for 64-bits bitops.  */
+  false, false,                                 /* Prefer 32-bit encodings.  */
+  ARM_SCHED_AUTOPREF_OFF,			/* Sched L2 autopref.  */
+  ARM_FUSE_NOTHING				/* Fuseable pairs of instructions.  */
+};
+
 const struct tune_params arm_cortex_a7_tune =
 {
   arm_9e_rtx_costs,
@@ -1599,7 +1881,10 @@
   false,					/* Prefer LDRD/STRD.  */
   {true, true},					/* Prefer non short circuit.  */
   &arm_default_vec_cost,			/* Vectorizer costs.  */
-  false						/* Prefer Neon for 64-bits bitops.  */
+  false,					/* Prefer Neon for 64-bits bitops.  */
+  false, false,                                 /* Prefer 32-bit encodings.  */
+  ARM_SCHED_AUTOPREF_OFF,			/* Sched L2 autopref.  */
+  ARM_FUSE_NOTHING				/* Fuseable pairs of instructions.  */
 };
 
 const struct tune_params arm_cortex_a15_tune =
@@ -1615,7 +1900,10 @@
   true,						/* Prefer LDRD/STRD.  */
   {true, true},					/* Prefer non short circuit.  */
   &arm_default_vec_cost,                        /* Vectorizer costs.  */
-  false                                         /* Prefer Neon for 64-bits bitops.  */
+  false,                                        /* Prefer Neon for 64-bits bitops.  */
+  true, true,                                   /* Prefer 32-bit encodings.  */
+  ARM_SCHED_AUTOPREF_FULL,			/* Sched L2 autopref.  */
+  ARM_FUSE_NOTHING				/* Fuseable pairs of instructions.  */
 };
 
 const struct tune_params arm_cortex_a53_tune =
@@ -1631,7 +1919,10 @@
   false,					/* Prefer LDRD/STRD.  */
   {true, true},					/* Prefer non short circuit.  */
   &arm_default_vec_cost,			/* Vectorizer costs.  */
-  false						/* Prefer Neon for 64-bits bitops.  */
+  false,					/* Prefer Neon for 64-bits bitops.  */
+  false, false,                                 /* Prefer 32-bit encodings.  */
+  ARM_SCHED_AUTOPREF_OFF,			/* Sched L2 autopref.  */
+  ARM_FUSE_MOVW_MOVT				/* Fuseable pairs of instructions.  */
 };
 
 const struct tune_params arm_cortex_a57_tune =
@@ -1647,9 +1938,31 @@
   true,                                       /* Prefer LDRD/STRD.  */
   {true, true},                                /* Prefer non short circuit.  */
   &arm_default_vec_cost,                       /* Vectorizer costs.  */
-  false                                        /* Prefer Neon for 64-bits bitops.  */
+  false,                                       /* Prefer Neon for 64-bits bitops.  */
+  true, true,                                  /* Prefer 32-bit encodings.  */
+  ARM_SCHED_AUTOPREF_FULL,			/* Sched L2 autopref.  */
+  ARM_FUSE_MOVW_MOVT				/* Fuseable pairs of instructions.  */
 };
 
+const struct tune_params arm_xgene1_tune =
+{
+  arm_9e_rtx_costs,
+  &xgene1_extra_costs,
+  NULL,                                        /* Scheduler cost adjustment.  */
+  1,                                           /* Constant limit.  */
+  2,                                           /* Max cond insns.  */
+  ARM_PREFETCH_NOT_BENEFICIAL,
+  false,                                       /* Prefer constant pool.  */
+  arm_default_branch_cost,
+  true,                                        /* Prefer LDRD/STRD.  */
+  {true, true},                                /* Prefer non short circuit.  */
+  &arm_default_vec_cost,                       /* Vectorizer costs.  */
+  false,                                       /* Prefer Neon for 64-bits bitops.  */
+  true, true,                                  /* Prefer 32-bit encodings.  */
+  ARM_SCHED_AUTOPREF_OFF,			/* Sched L2 autopref.  */
+  ARM_FUSE_NOTHING				/* Fuseable pairs of instructions.  */
+};
+
 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
    less appealing.  Set max_insns_skipped to a low value.  */
 
@@ -1656,7 +1969,7 @@
 const struct tune_params arm_cortex_a5_tune =
 {
   arm_9e_rtx_costs,
-  NULL,
+  &cortexa5_extra_costs,
   NULL,						/* Sched adj cost.  */
   1,						/* Constant limit.  */
   1,						/* Max cond insns.  */
@@ -1666,7 +1979,10 @@
   false,					/* Prefer LDRD/STRD.  */
   {false, false},				/* Prefer non short circuit.  */
   &arm_default_vec_cost,                        /* Vectorizer costs.  */
-  false                                         /* Prefer Neon for 64-bits bitops.  */
+  false,                                        /* Prefer Neon for 64-bits bitops.  */
+  false, false,                                 /* Prefer 32-bit encodings.  */
+  ARM_SCHED_AUTOPREF_OFF,			/* Sched L2 autopref.  */
+  ARM_FUSE_NOTHING				/* Fuseable pairs of instructions.  */
 };
 
 const struct tune_params arm_cortex_a9_tune =
@@ -1682,7 +1998,10 @@
   false,					/* Prefer LDRD/STRD.  */
   {true, true},					/* Prefer non short circuit.  */
   &arm_default_vec_cost,                        /* Vectorizer costs.  */
-  false                                         /* Prefer Neon for 64-bits bitops.  */
+  false,                                        /* Prefer Neon for 64-bits bitops.  */
+  false, false,                                 /* Prefer 32-bit encodings.  */
+  ARM_SCHED_AUTOPREF_OFF,			/* Sched L2 autopref.  */
+  ARM_FUSE_NOTHING				/* Fuseable pairs of instructions.  */
 };
 
 const struct tune_params arm_cortex_a12_tune =
@@ -1689,16 +2008,19 @@
 {
   arm_9e_rtx_costs,
   &cortexa12_extra_costs,
-  NULL,
+  NULL,						/* Sched adj cost. */
   1,						/* Constant limit.  */
-  5,						/* Max cond insns.  */
-  ARM_PREFETCH_BENEFICIAL(4,32,32),
+  2,						/* Max cond insns.  */
+  ARM_PREFETCH_NOT_BENEFICIAL,
   false,					/* Prefer constant pool.  */
   arm_default_branch_cost,
   true,						/* Prefer LDRD/STRD.  */
   {true, true},					/* Prefer non short circuit.  */
   &arm_default_vec_cost,                        /* Vectorizer costs.  */
-  false                                         /* Prefer Neon for 64-bits bitops.  */
+  false,                                        /* Prefer Neon for 64-bits bitops.  */
+  true, true,                                 	/* Prefer 32-bit encodings.  */
+  ARM_SCHED_AUTOPREF_OFF,			/* Sched L2 autopref.  */
+  ARM_FUSE_MOVW_MOVT				/* Fuseable pairs of instructions.  */
 };
 
 /* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
@@ -1721,9 +2043,33 @@
   false,					/* Prefer LDRD/STRD.  */
   {false, false},				/* Prefer non short circuit.  */
   &arm_default_vec_cost,                        /* Vectorizer costs.  */
-  false                                         /* Prefer Neon for 64-bits bitops.  */
+  false,                                        /* Prefer Neon for 64-bits bitops.  */
+  false, false,                                 /* Prefer 32-bit encodings.  */
+  ARM_SCHED_AUTOPREF_OFF,			/* Sched L2 autopref.  */
+  ARM_FUSE_NOTHING				/* Fuseable pairs of instructions.  */
 };
 
+/* Cortex-M7 tuning.  */
+
+const struct tune_params arm_cortex_m7_tune =
+{
+  arm_9e_rtx_costs,
+  &v7m_extra_costs,
+  NULL,						/* Sched adj cost.  */
+  0,						/* Constant limit.  */
+  0,						/* Max cond insns.  */
+  ARM_PREFETCH_NOT_BENEFICIAL,
+  true,						/* Prefer constant pool.  */
+  arm_cortex_m_branch_cost,
+  false,					/* Prefer LDRD/STRD.  */
+  {true, true},					/* Prefer non short circuit.  */
+  &arm_default_vec_cost,                        /* Vectorizer costs.  */
+  false,                                        /* Prefer Neon for 64-bits bitops.  */
+  false, false,                                 /* Prefer 32-bit encodings.  */
+  ARM_SCHED_AUTOPREF_OFF,			/* Sched L2 autopref.  */
+  ARM_FUSE_NOTHING				/* Fuseable pairs of instructions.  */
+};
+
 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
    arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus.  */
 const struct tune_params arm_v6m_tune =
@@ -1739,7 +2085,10 @@
   false,					/* Prefer LDRD/STRD.  */
   {false, false},				/* Prefer non short circuit.  */
   &arm_default_vec_cost,                        /* Vectorizer costs.  */
-  false                                         /* Prefer Neon for 64-bits bitops.  */
+  false,                                        /* Prefer Neon for 64-bits bitops.  */
+  false, false,                                 /* Prefer 32-bit encodings.  */
+  ARM_SCHED_AUTOPREF_OFF,			/* Sched L2 autopref.  */
+  ARM_FUSE_NOTHING				/* Fuseable pairs of instructions.  */
 };
 
 const struct tune_params arm_fa726te_tune =
@@ -1755,7 +2104,10 @@
   false,					/* Prefer LDRD/STRD.  */
   {true, true},					/* Prefer non short circuit.  */
   &arm_default_vec_cost,                        /* Vectorizer costs.  */
-  false                                         /* Prefer Neon for 64-bits bitops.  */
+  false,                                        /* Prefer Neon for 64-bits bitops.  */
+  false, false,                                 /* Prefer 32-bit encodings.  */
+  ARM_SCHED_AUTOPREF_OFF,			/* Sched L2 autopref.  */
+  ARM_FUSE_NOTHING				/* Fuseable pairs of instructions.  */
 };
 
 
@@ -2503,6 +2855,7 @@
   arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
   arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
   arm_arch_crc = (insn_flags & FL_CRC32) != 0;
+  arm_m_profile_small_mul = (insn_flags & FL_SMALLMUL) != 0;
   if (arm_restrict_it == 2)
     arm_restrict_it = arm_arch8 && TARGET_THUMB2;
 
@@ -2806,10 +3159,26 @@
      prefer_neon_for_64bits = true;
 
   /* Use the alternative scheduling-pressure algorithm by default.  */
-  maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
+  maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
                          global_options.x_param_values,
                          global_options_set.x_param_values);
 
+  /* Look through ready list and all of queue for instructions
+     relevant for L2 auto-prefetcher.  */
+  int param_sched_autopref_queue_depth;
+  if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_OFF)
+    param_sched_autopref_queue_depth = -1;
+  else if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_RANK)
+    param_sched_autopref_queue_depth = 0;
+  else if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_FULL)
+    param_sched_autopref_queue_depth = max_insn_queue_index + 1;
+  else
+    gcc_unreachable ();
+  maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
+			 param_sched_autopref_queue_depth,
+                         global_options.x_param_values,
+                         global_options_set.x_param_values);
+
   /* Disable shrink-wrap when optimizing function for size, since it tends to
      generate additional returns.  */
   if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
@@ -6079,11 +6448,6 @@
   if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
     return false;
 
-  /* Cannot tail-call to long calls, since these are out of range of
-     a branch instruction.  */
-  if (decl && arm_is_long_call_p (decl))
-    return false;
-
   /* If we are interworking and the function is not declared static
      then we can't tail-call it unless we know that it exists in this
      compilation unit (since it might be a Thumb routine).  */
@@ -8637,7 +9001,13 @@
           /* Thumb1 mul instruction can't operate on const. We must Load it
              into a register first.  */
           int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
-          return COSTS_N_INSNS (1) + const_size;
+	  /* For the targets which have a very small and high-latency multiply
+	     unit, we prefer to synthesize the mult with up to 5 instructions,
+	     giving a good balance between size and performance.  */
+	  if (arm_arch6m && arm_m_profile_small_mul)
+	    return COSTS_N_INSNS (5);
+	  else
+	    return COSTS_N_INSNS (1) + const_size;
         }
       return COSTS_N_INSNS (1);
 
@@ -9337,6 +9707,47 @@
       *cost = LIBCALL_COST (2);
       return false;
 
+    case BSWAP:
+      if (arm_arch6)
+        {
+          if (mode == SImode)
+            {
+              *cost = COSTS_N_INSNS (1);
+              if (speed_p)
+                *cost += extra_cost->alu.rev;
+
+              return false;
+            }
+        }
+      else
+        {
+        /* No rev instruction available.  Look at arm_legacy_rev
+           and thumb_legacy_rev for the form of RTL used then.  */
+          if (TARGET_THUMB)
+            {
+              *cost = COSTS_N_INSNS (10);
+
+              if (speed_p)
+                {
+                  *cost += 6 * extra_cost->alu.shift;
+                  *cost += 3 * extra_cost->alu.logical;
+                }
+            }
+          else
+            {
+              *cost = COSTS_N_INSNS (5);
+
+              if (speed_p)
+                {
+                  *cost += 2 * extra_cost->alu.shift;
+                  *cost += extra_cost->alu.arith_shift;
+                  *cost += 2 * extra_cost->alu.logical;
+                }
+            }
+          return true;
+        }
+      return false;
+
     case MINUS:
       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
 	  && (mode == SFmode || !TARGET_VFP_SINGLE))
@@ -9719,8 +10130,17 @@
       /* Vector mode?  */
       *cost = LIBCALL_COST (2);
       return false;
+    case IOR:
+      if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
+        {
+          *cost = COSTS_N_INSNS (1);
+          if (speed_p)
+            *cost += extra_cost->alu.rev;
 
-    case AND: case XOR: case IOR:
+          return true;
+        }
+    /* Fall through.  */
+    case AND: case XOR:
       if (mode == SImode)
 	{
 	  enum rtx_code subcode = GET_CODE (XEXP (x, 0));
@@ -10626,6 +11046,36 @@
       *cost = LIBCALL_COST (1);
       return false;
 
+    case FMA:
+      if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
+        {
+          rtx op0 = XEXP (x, 0);
+          rtx op1 = XEXP (x, 1);
+          rtx op2 = XEXP (x, 2);
+
+          *cost = COSTS_N_INSNS (1);
+
+          /* vfms or vfnma.  */
+          if (GET_CODE (op0) == NEG)
+            op0 = XEXP (op0, 0);
+
+          /* vfnms or vfnma.  */
+          if (GET_CODE (op2) == NEG)
+            op2 = XEXP (op2, 0);
+
+          *cost += rtx_cost (op0, FMA, 0, speed_p);
+          *cost += rtx_cost (op1, FMA, 1, speed_p);
+          *cost += rtx_cost (op2, FMA, 2, speed_p);
+
+          if (speed_p)
+            *cost += extra_cost->fp[mode ==DFmode].fma;
+
+          return true;
+        }
+
+      *cost = LIBCALL_COST (3);
+      return false;
+
     case FIX:
     case UNSIGNED_FIX:
       if (TARGET_HARD_FLOAT)
@@ -10676,10 +11126,16 @@
       return true;
 
     case ASM_OPERANDS:
-      /* Just a guess.  Cost one insn per input.  */
-      *cost = COSTS_N_INSNS (ASM_OPERANDS_INPUT_LENGTH (x));
-      return true;
+      {
+      /* Just a guess.  Guess number of instructions in the asm
+         plus one insn per input.  Always a minimum of COSTS_N_INSNS (1)
+         though (see PR60663).  */
+        int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
+        int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
 
+        *cost = COSTS_N_INSNS (asm_length + num_operands);
+        return true;
+      }
     default:
       if (mode != VOIDmode)
 	*cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
@@ -10976,7 +11432,11 @@
       switch (code)
 	{
 	case MULT:
-	  *total = COSTS_N_INSNS (3);
+	  /* Small multiply: 32 cycles for an integer multiply inst.  */
+	  if (arm_arch6m && arm_m_profile_small_mul)
+	    *total = COSTS_N_INSNS (32);
+	  else
+	    *total = COSTS_N_INSNS (3);
 	  return true;
 
 	default:
@@ -12573,7 +13033,11 @@
       || (type == 0 && GET_CODE (ind) == PRE_DEC))
     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
 
-  /* FIXME: vld1 allows register post-modify.  */
+  /* Allow post-increment by register for VLDn */
+  if (type == 2 && GET_CODE (ind) == POST_MODIFY
+      && GET_CODE (XEXP (ind, 1)) == PLUS
+      && REG_P (XEXP (XEXP (ind, 1), 1)))
+     return true;
 
   /* Match:
      (plus (reg)
@@ -16794,9 +17258,20 @@
   compute_bb_for_insn ();
   df_analyze ();
 
+  enum Convert_Action {SKIP, CONV, SWAP_CONV};
+
   FOR_EACH_BB_FN (bb, cfun)
     {
+      if (current_tune->disparage_flag_setting_t16_encodings
+	  && optimize_bb_for_speed_p (bb))
+	continue;
+
       rtx insn;
+      Convert_Action action = SKIP;
+      Convert_Action action_for_partial_flag_setting
+	= (current_tune->disparage_partial_flag_setting_t16_encodings
+	   && optimize_bb_for_speed_p (bb))
+	  ? SKIP : CONV;
 
       COPY_REG_SET (&live, DF_LR_OUT (bb));
       df_simulate_initialize_backwards (bb, &live);
@@ -16806,7 +17281,7 @@
 	      && !REGNO_REG_SET_P (&live, CC_REGNUM)
 	      && GET_CODE (PATTERN (insn)) == SET)
 	    {
-	      enum {SKIP, CONV, SWAP_CONV} action = SKIP;
+	      action = SKIP;
 	      rtx pat = PATTERN (insn);
 	      rtx dst = XEXP (pat, 0);
 	      rtx src = XEXP (pat, 1);
@@ -16887,10 +17362,11 @@
 		      /* ANDS <Rdn>,<Rm>  */
 		      if (rtx_equal_p (dst, op0)
 			  && low_register_operand (op1, SImode))
-			action = CONV;
+			action = action_for_partial_flag_setting;
 		      else if (rtx_equal_p (dst, op1)
 			       && low_register_operand (op0, SImode))
-			action = SWAP_CONV;
+			action = action_for_partial_flag_setting == SKIP
+				 ? SKIP : SWAP_CONV;
 		      break;
 
 		    case ASHIFTRT:
@@ -16901,7 +17377,7 @@
 		      /* LSLS <Rdn>,<Rm> */
 		      if (rtx_equal_p (dst, op0)
 			  && low_register_operand (op1, SImode))
-			action = CONV;
+			action = action_for_partial_flag_setting;
 		      /* ASRS <Rd>,<Rm>,#<imm5> */
 		      /* LSRS <Rd>,<Rm>,#<imm5> */
 		      /* LSLS <Rd>,<Rm>,#<imm5> */
@@ -16908,7 +17384,7 @@
 		      else if (low_register_operand (op0, SImode)
 			       && CONST_INT_P (op1)
 			       && IN_RANGE (INTVAL (op1), 0, 31))
-			action = CONV;
+			action = action_for_partial_flag_setting;
 		      break;
 
 		    case ROTATERT:
@@ -16915,12 +17391,16 @@
 		      /* RORS <Rdn>,<Rm>  */
 		      if (rtx_equal_p (dst, op0)
 			  && low_register_operand (op1, SImode))
-			action = CONV;
+			action = action_for_partial_flag_setting;
 		      break;
 
 		    case NOT:
+		      /* MVNS <Rd>,<Rm>  */
+		      if (low_register_operand (op0, SImode))
+			action = action_for_partial_flag_setting;
+		      break;
+
 		    case NEG:
-		      /* MVNS <Rd>,<Rm>  */
 		      /* NEGS <Rd>,<Rm>  (a.k.a RSBS)  */
 		      if (low_register_operand (op0, SImode))
 			action = CONV;
@@ -16930,7 +17410,7 @@
 		      /* MOVS <Rd>,#<imm8>  */
 		      if (CONST_INT_P (src)
 			  && IN_RANGE (INTVAL (src), 0, 255))
-			action = CONV;
+			action = action_for_partial_flag_setting;
 		      break;
 
 		    case REG:
@@ -17151,24 +17631,7 @@
 
 /* Routines to output assembly language.  */
 
-/* If the rtx is the correct value then return the string of the number.
-   In this way we can ensure that valid double constants are generated even
-   when cross compiling.  */
-const char *
-fp_immediate_constant (rtx x)
-{
-  REAL_VALUE_TYPE r;
-
-  if (!fp_consts_inited)
-    init_fp_table ();
-
-  REAL_VALUE_FROM_CONST_DOUBLE (r, x);
-
-  gcc_assert (REAL_VALUES_EQUAL (r, value_fp0));
-  return "0";
-}
-
-/* As for fp_immediate_constant, but value is passed directly, not in rtx.  */
+/* Return string representation of passed in real value.  */
 static const char *
 fp_const_from_val (REAL_VALUE_TYPE *r)
 {
@@ -17259,14 +17722,22 @@
 /* Output the assembly for a store multiple.  */
 
 const char *
-vfp_output_fstmd (rtx * operands)
+vfp_output_vstmd (rtx * operands)
 {
   char pattern[100];
   int p;
   int base;
   int i;
+  rtx addr_reg = REG_P (XEXP (operands[0], 0))
+		   ? XEXP (operands[0], 0)
+		   : XEXP (XEXP (operands[0], 0), 0);
+  bool push_p =  REGNO (addr_reg) == SP_REGNUM;
 
-  strcpy (pattern, "fstmfdd%?\t%m0!, {%P1");
+  if (push_p)
+    strcpy (pattern, "vpush%?.64\t{%P1");
+  else
+    strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
+
   p = strlen (pattern);
 
   gcc_assert (REG_P (operands[1]));
@@ -17394,6 +17865,15 @@
       require_pic_register ();
       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
     }
+
+  if (TARGET_AAPCS_BASED)
+    {
+      /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
+         linker.  */
+      rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
+      clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
+      clobber_reg (fusage, gen_rtx_REG (word_mode, CC_REGNUM));
+    }
 }
 
 /* Output a 'call' insn.  */
@@ -18073,19 +18553,19 @@
   switch (GET_CODE (addr))
     {
     case PRE_DEC:
-      templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
+      templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
       ops[0] = XEXP (addr, 0);
       ops[1] = reg;
       break;
 
     case POST_INC:
-      templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
+      templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
       ops[0] = XEXP (addr, 0);
       ops[1] = reg;
       break;
 
     default:
-      templ = "f%s%c%%?\t%%%s0, %%1%s";
+      templ = "v%sr%%?.%s\t%%%s0, %%1%s";
       ops[0] = reg;
       ops[1] = mem;
       break;
@@ -18093,7 +18573,7 @@
 
   sprintf (buff, templ,
 	   load ? "ld" : "st",
-	   dp ? 'd' : 's',
+	   dp ? "64" : "32",
 	   dp ? "P" : "",
 	   integer_p ? "\t%@ int" : "");
   output_asm_insn (buff, ops);
@@ -18781,6 +19261,7 @@
       || (save_reg_mask
 	  && optimize_size
 	  && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
+	  && !crtl->tail_call_emit
 	  && !crtl->calls_eh_return))
     save_reg_mask |= 1 << LR_REGNUM;
 
@@ -20441,6 +20922,18 @@
 	{
 	  int reg = -1;
 
+	  /* Register r3 is caller-saved.  Normally it does not need to be
+	     saved on entry by the prologue.  However if we choose to save
+	     it for padding then we may confuse the compiler into thinking
+	     a prologue sequence is required when in fact it is not.  This
+	     will occur when shrink-wrapping if r3 is used as a scratch
+	     register and there are no other callee-saved writes.
+
+	     This situation can be avoided when other callee-saved registers
+	     are available and r3 is not mandatory if we choose a callee-saved
+	     register for padding.  */
+	  bool prefer_callee_reg_p = false;
+
 	  /* If it is safe to use r3, then do so.  This sometimes
 	     generates better code on Thumb-2 by avoiding the need to
 	     use 32-bit push/pop instructions.  */
@@ -20447,24 +20940,29 @@
           if (! any_sibcall_could_use_r3 ()
 	      && arm_size_return_regs () <= 12
 	      && (offsets->saved_regs_mask & (1 << 3)) == 0
-              && (TARGET_THUMB2
+	      && (TARGET_THUMB2
 		  || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
 	    {
 	      reg = 3;
+	      if (!TARGET_THUMB2)
+		prefer_callee_reg_p = true;
 	    }
-	  else
-	    for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
-	      {
-		/* Avoid fixed registers; they may be changed at
-		   arbitrary times so it's unsafe to restore them
-		   during the epilogue.  */
-		if (!fixed_regs[i]
-		    && (offsets->saved_regs_mask & (1 << i)) == 0)
-		  {
-		    reg = i;
-		    break;
-		  }
-	      }
+	  if (reg == -1
+	      || prefer_callee_reg_p)
+	    {
+	      for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
+		{
+		  /* Avoid fixed registers; they may be changed at
+		     arbitrary times so it's unsafe to restore them
+		     during the epilogue.  */
+		  if (!fixed_regs[i]
+		      && (offsets->saved_regs_mask & (1 << i)) == 0)
+		    {
+		      reg = i;
+		      break;
+		    }
+		}
+	    }
 
 	  if (reg != -1)
 	    {
@@ -21054,7 +21552,15 @@
 }
 
 
-/* If CODE is 'd', then the X is a condition operand and the instruction
+/* Globally reserved letters: acln
+   Puncutation letters currently used: @_|?().!#
+   Lower case letters currently used: bcdefhimpqtvwxyz
+   Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
+   Letters previously used, but now deprecated/obsolete: sVWXYZ.
+
+   Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
+
+   If CODE is 'd', then the X is a condition operand and the instruction
    should only be executed if the condition is true.
    if CODE is 'D', then the X is a condition operand and the instruction
    should only be executed if the condition is false: however, if the mode
@@ -21194,6 +21700,19 @@
 	}
       return;
 
+    case 'b':
+      /* Print the log2 of a CONST_INT.  */
+      {
+	HOST_WIDE_INT val;
+
+	if (!CONST_INT_P (x)
+	    || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
+	  output_operand_lossage ("Unsupported operand for code '%c'", code);
+	else
+	  fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
+      }
+      return;
+
     case 'L':
       /* The low 16 bits of an immediate constant.  */
       fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
@@ -21436,7 +21955,7 @@
        register.  */
     case 'p':
       {
-        int mode = GET_MODE (x);
+        enum machine_mode mode = GET_MODE (x);
         int regno;
 
         if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
@@ -21460,7 +21979,7 @@
     case 'P':
     case 'q':
       {
-	int mode = GET_MODE (x);
+	enum machine_mode mode = GET_MODE (x);
 	int is_quad = (code == 'q');
 	int regno;
 
@@ -21496,7 +22015,7 @@
     case 'e':
     case 'f':
       {
-        int mode = GET_MODE (x);
+        enum machine_mode mode = GET_MODE (x);
         int regno;
 
         if ((GET_MODE_SIZE (mode) != 16
@@ -21578,6 +22097,7 @@
       {
 	rtx addr;
 	bool postinc = FALSE;
+	rtx postinc_reg = NULL;
 	unsigned align, memsize, align_bits;
 
 	gcc_assert (MEM_P (x));
@@ -21587,6 +22107,11 @@
 	    postinc = 1;
 	    addr = XEXP (addr, 0);
 	  }
+	if (GET_CODE (addr) == POST_MODIFY)
+	  {
+	    postinc_reg = XEXP( XEXP (addr, 1), 1);
+	    addr = XEXP (addr, 0);
+	  }
 	asm_fprintf (stream, "[%r", REGNO (addr));
 
 	/* We know the alignment of this access, so we can emit a hint in the
@@ -21612,6 +22137,8 @@
 
 	if (postinc)
 	  fputs("!", stream);
+	if (postinc_reg)
+	  asm_fprintf (stream, ", %r", REGNO (postinc_reg));
       }
       return;
 
@@ -21629,7 +22156,7 @@
     /* Translate an S register number into a D register number and element index.  */
     case 'y':
       {
-        int mode = GET_MODE (x);
+        enum machine_mode mode = GET_MODE (x);
         int regno;
 
         if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
@@ -21663,7 +22190,7 @@
        number into a D register number and element index.  */
     case 'z':
       {
-        int mode = GET_MODE (x);
+        enum machine_mode mode = GET_MODE (x);
         int regno;
 
         if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
@@ -21703,15 +22230,12 @@
 	  break;
 
 	case CONST_DOUBLE:
-          if (TARGET_NEON)
-            {
-              char fpstr[20];
-              real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
-			       sizeof (fpstr), 0, 1);
-              fprintf (stream, "#%s", fpstr);
-            }
-          else
-	    fprintf (stream, "#%s", fp_immediate_constant (x));
+	  {
+            char fpstr[20];
+            real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
+			      sizeof (fpstr), 0, 1);
+            fprintf (stream, "#%s", fpstr);
+	  }
 	  break;
 
 	default:
@@ -22579,6 +23103,9 @@
 	    || (TARGET_HARD_FLOAT && TARGET_VFP
 		&& regno == VFPCC_REGNUM));
 
+  if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
+    return false;
+
   if (TARGET_THUMB1)
     /* For the Thumb we only allow values bigger than SImode in
        registers 0 - 6, so that there is always a second low
@@ -22624,13 +23151,20 @@
     }
 
   /* We allow almost any value to be stored in the general registers.
-     Restrict doubleword quantities to even register pairs so that we can
-     use ldrd.  Do not allow very large Neon structure opaque modes in
-     general registers; they would use too many.  */
+     Restrict doubleword quantities to even register pairs in ARM state
+     so that we can use ldrd.  Do not allow very large Neon structure
+     opaque modes in general registers; they would use too many.  */
   if (regno <= LAST_ARM_REGNUM)
-    return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
-      && ARM_NUM_REGS (mode) <= 4;
+    {
+      if (ARM_NUM_REGS (mode) > 4)
+	  return FALSE;
 
+      if (TARGET_THUMB2)
+	return TRUE;
+
+      return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
+    }
+
   if (regno == FRAME_POINTER_REGNUM
       || regno == ARG_POINTER_REGNUM)
     /* We only allow integers in the fake hard registers.  */
@@ -22668,6 +23202,9 @@
 enum reg_class
 arm_regno_class (int regno)
 {
+  if (regno == PC_REGNUM)
+    return NO_REGS;
+
   if (TARGET_THUMB1)
     {
       if (regno == STACK_POINTER_REGNUM)
@@ -22841,10 +23378,12 @@
   NEON_BINOP,
   NEON_TERNOP,
   NEON_UNOP,
+  NEON_BSWAP,
   NEON_GETLANE,
   NEON_SETLANE,
   NEON_CREATE,
   NEON_RINT,
+  NEON_COPYSIGNF,
   NEON_DUP,
   NEON_DUPLANE,
   NEON_COMBINE,
@@ -22862,7 +23401,6 @@
   NEON_FLOAT_NARROW,
   NEON_FIXCONV,
   NEON_SELECT,
-  NEON_RESULTPAIR,
   NEON_REINTERP,
   NEON_VTBL,
   NEON_VTBX,
@@ -23231,6 +23769,9 @@
   ARM_BUILTIN_CRC32CH,
   ARM_BUILTIN_CRC32CW,
 
+  ARM_BUILTIN_GET_FPSCR,
+  ARM_BUILTIN_SET_FPSCR,
+
 #undef CRYPTO1
 #undef CRYPTO2
 #undef CRYPTO3
@@ -23308,14 +23849,19 @@
 
   tree V8QI_type_node;
   tree V4HI_type_node;
+  tree V4UHI_type_node;
   tree V4HF_type_node;
   tree V2SI_type_node;
+  tree V2USI_type_node;
   tree V2SF_type_node;
   tree V16QI_type_node;
   tree V8HI_type_node;
+  tree V8UHI_type_node;
   tree V4SI_type_node;
+  tree V4USI_type_node;
   tree V4SF_type_node;
   tree V2DI_type_node;
+  tree V2UDI_type_node;
 
   tree intUQI_type_node;
   tree intUHI_type_node;
@@ -23327,27 +23873,6 @@
   tree intCI_type_node;
   tree intXI_type_node;
 
-  tree V8QI_pointer_node;
-  tree V4HI_pointer_node;
-  tree V2SI_pointer_node;
-  tree V2SF_pointer_node;
-  tree V16QI_pointer_node;
-  tree V8HI_pointer_node;
-  tree V4SI_pointer_node;
-  tree V4SF_pointer_node;
-  tree V2DI_pointer_node;
-
-  tree void_ftype_pv8qi_v8qi_v8qi;
-  tree void_ftype_pv4hi_v4hi_v4hi;
-  tree void_ftype_pv2si_v2si_v2si;
-  tree void_ftype_pv2sf_v2sf_v2sf;
-  tree void_ftype_pdi_di_di;
-  tree void_ftype_pv16qi_v16qi_v16qi;
-  tree void_ftype_pv8hi_v8hi_v8hi;
-  tree void_ftype_pv4si_v4si_v4si;
-  tree void_ftype_pv4sf_v4sf_v4sf;
-  tree void_ftype_pv2di_v2di_v2di;
-
   tree reinterp_ftype_dreg[NUM_DREG_TYPES][NUM_DREG_TYPES];
   tree reinterp_ftype_qreg[NUM_QREG_TYPES][NUM_QREG_TYPES];
   tree dreg_types[NUM_DREG_TYPES], qreg_types[NUM_QREG_TYPES];
@@ -23411,6 +23936,12 @@
   const_intDI_pointer_node = build_pointer_type (const_intDI_node);
   const_float_pointer_node = build_pointer_type (const_float_node);
 
+  /* Unsigned integer types for various mode sizes.  */
+  intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
+  intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
+  intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
+  intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
+  neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode));
   /* Now create vector types based on our NEON element types.  */
   /* 64-bit vectors.  */
   V8QI_type_node =
@@ -23417,10 +23948,14 @@
     build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
   V4HI_type_node =
     build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
+  V4UHI_type_node =
+    build_vector_type_for_mode (intUHI_type_node, V4HImode);
   V4HF_type_node =
     build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode);
   V2SI_type_node =
     build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
+  V2USI_type_node =
+    build_vector_type_for_mode (intUSI_type_node, V2SImode);
   V2SF_type_node =
     build_vector_type_for_mode (neon_float_type_node, V2SFmode);
   /* 128-bit vectors.  */
@@ -23428,21 +23963,20 @@
     build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
   V8HI_type_node =
     build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
+  V8UHI_type_node =
+    build_vector_type_for_mode (intUHI_type_node, V8HImode);
   V4SI_type_node =
     build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
+  V4USI_type_node =
+    build_vector_type_for_mode (intUSI_type_node, V4SImode);
   V4SF_type_node =
     build_vector_type_for_mode (neon_float_type_node, V4SFmode);
   V2DI_type_node =
     build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
+  V2UDI_type_node =
+    build_vector_type_for_mode (intUDI_type_node, V2DImode);
 
-  /* Unsigned integer types for various mode sizes.  */
-  intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
-  intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
-  intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
-  intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
-  neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode));
 
-
   (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
 					     "__builtin_neon_uqi");
   (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
@@ -23473,53 +24007,8 @@
   (*lang_hooks.types.register_builtin_type) (intXI_type_node,
 					     "__builtin_neon_xi");
 
-  /* Pointers to vector types.  */
-  V8QI_pointer_node = build_pointer_type (V8QI_type_node);
-  V4HI_pointer_node = build_pointer_type (V4HI_type_node);
-  V2SI_pointer_node = build_pointer_type (V2SI_type_node);
-  V2SF_pointer_node = build_pointer_type (V2SF_type_node);
-  V16QI_pointer_node = build_pointer_type (V16QI_type_node);
-  V8HI_pointer_node = build_pointer_type (V8HI_type_node);
-  V4SI_pointer_node = build_pointer_type (V4SI_type_node);
-  V4SF_pointer_node = build_pointer_type (V4SF_type_node);
-  V2DI_pointer_node = build_pointer_type (V2DI_type_node);
-
-  /* Operations which return results as pairs.  */
-  void_ftype_pv8qi_v8qi_v8qi =
-    build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
-  			      V8QI_type_node, NULL);
-  void_ftype_pv4hi_v4hi_v4hi =
-    build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
-  			      V4HI_type_node, NULL);
-  void_ftype_pv2si_v2si_v2si =
-    build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
-  			      V2SI_type_node, NULL);
-  void_ftype_pv2sf_v2sf_v2sf =
-    build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
-  			      V2SF_type_node, NULL);
-  void_ftype_pdi_di_di =
-    build_function_type_list (void_type_node, intDI_pointer_node,
-			      neon_intDI_type_node, neon_intDI_type_node, NULL);
-  void_ftype_pv16qi_v16qi_v16qi =
-    build_function_type_list (void_type_node, V16QI_pointer_node,
-			      V16QI_type_node, V16QI_type_node, NULL);
-  void_ftype_pv8hi_v8hi_v8hi =
-    build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
-  			      V8HI_type_node, NULL);
-  void_ftype_pv4si_v4si_v4si =
-    build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
-  			      V4SI_type_node, NULL);
-  void_ftype_pv4sf_v4sf_v4sf =
-    build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
-  			      V4SF_type_node, NULL);
-  void_ftype_pv2di_v2di_v2di =
-    build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
-			      V2DI_type_node, NULL);
-
   if (TARGET_CRYPTO && TARGET_HARD_FLOAT)
   {
-    tree V4USI_type_node =
-      build_vector_type_for_mode (intUSI_type_node, V4SImode);
 
     tree V16UQI_type_node =
       build_vector_type_for_mode (intUQI_type_node, V16QImode);
@@ -23805,25 +24294,6 @@
 	  }
 	  break;
 
-	case NEON_RESULTPAIR:
-	  {
-	    switch (insn_data[d->code].operand[1].mode)
-	      {
-	      case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
-	      case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
-	      case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
-	      case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
-	      case DImode: ftype = void_ftype_pdi_di_di; break;
-	      case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
-	      case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
-	      case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
-	      case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
-	      case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
-	      default: gcc_unreachable ();
-	      }
-	  }
-	  break;
-
 	case NEON_REINTERP:
 	  {
 	    /* We iterate over NUM_DREG_TYPES doubleword types,
@@ -23883,6 +24353,47 @@
 	    ftype = build_function_type_list (return_type, eltype, NULL);
 	    break;
 	  }
+	case NEON_BSWAP:
+	{
+	    tree eltype = NULL_TREE;
+	    switch (insn_data[d->code].operand[1].mode)
+	    {
+	      case V4HImode:
+	        eltype = V4UHI_type_node;
+	        break;
+	      case V8HImode:
+	        eltype = V8UHI_type_node;
+	        break;
+	      case V2SImode:
+	        eltype = V2USI_type_node;
+	        break;
+	      case V4SImode:
+	        eltype = V4USI_type_node;
+	        break;
+	      case V2DImode:
+	        eltype = V2UDI_type_node;
+	        break;
+	      default: gcc_unreachable ();
+	    }
+	    ftype = build_function_type_list (eltype, eltype, NULL);
+	    break;
+	}
+	case NEON_COPYSIGNF:
+	  {
+	    tree eltype = NULL_TREE;
+	    switch (insn_data[d->code].operand[1].mode)
+	      {
+	      case V2SFmode:
+		eltype = V2SF_type_node;
+		break;
+	      case V4SFmode:
+		eltype = V4SF_type_node;
+		break;
+	      default: gcc_unreachable ();
+	      }
+	    ftype = build_function_type_list (eltype, eltype, NULL);
+	    break;
+	  }
 	default:
 	  gcc_unreachable ();
 	}
@@ -24029,6 +24540,15 @@
   IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
   IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
 
+
+#define FP_BUILTIN(L, U) \
+  {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
+   UNKNOWN, 0},
+
+  FP_BUILTIN (get_fpscr, GET_FPSCR)
+  FP_BUILTIN (set_fpscr, SET_FPSCR)
+#undef FP_BUILTIN
+
 #define CRC32_BUILTIN(L, U) \
   {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
    UNKNOWN, 0},
@@ -24543,6 +25063,21 @@
 
   if (TARGET_CRC32)
     arm_init_crc32_builtins ();
+
+  if (TARGET_VFP && TARGET_HARD_FLOAT)
+    {
+      tree ftype_set_fpscr
+	= build_function_type_list (void_type_node, unsigned_type_node, NULL);
+      tree ftype_get_fpscr
+	= build_function_type_list (unsigned_type_node, NULL);
+
+      arm_builtin_decls[ARM_BUILTIN_GET_FPSCR]
+	= add_builtin_function ("__builtin_arm_ldfscr", ftype_get_fpscr,
+				ARM_BUILTIN_GET_FPSCR, BUILT_IN_MD, NULL, NULL_TREE);
+      arm_builtin_decls[ARM_BUILTIN_SET_FPSCR]
+	= add_builtin_function ("__builtin_arm_stfscr", ftype_set_fpscr,
+				ARM_BUILTIN_SET_FPSCR, BUILT_IN_MD, NULL, NULL_TREE);
+    }
 }
 
 /* Return the ARM builtin for CODE.  */
@@ -25057,20 +25592,17 @@
     case NEON_SPLIT:
     case NEON_FLOAT_WIDEN:
     case NEON_FLOAT_NARROW:
+    case NEON_BSWAP:
     case NEON_REINTERP:
       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
         NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
 
+    case NEON_COPYSIGNF:
     case NEON_COMBINE:
     case NEON_VTBL:
       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
 
-    case NEON_RESULTPAIR:
-      return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
-        NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
-        NEON_ARG_STOP);
-
     case NEON_LANEMUL:
     case NEON_LANEMULL:
     case NEON_LANEMULH:
@@ -25132,24 +25664,6 @@
   emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
 }
 
-/* Emit code to place a Neon pair result in memory locations (with equal
-   registers).  */
-void
-neon_emit_pair_result_insn (enum machine_mode mode,
-			    rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
-                            rtx op1, rtx op2)
-{
-  rtx mem = gen_rtx_MEM (mode, destaddr);
-  rtx tmp1 = gen_reg_rtx (mode);
-  rtx tmp2 = gen_reg_rtx (mode);
-
-  emit_insn (intfn (tmp1, op1, op2, tmp2));
-
-  emit_move_insn (mem, tmp1);
-  mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
-  emit_move_insn (mem, tmp2);
-}
-
 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
    not to early-clobber SRC registers in the process.
 
@@ -25270,6 +25784,25 @@
 
   switch (fcode)
     {
+    case ARM_BUILTIN_GET_FPSCR:
+    case ARM_BUILTIN_SET_FPSCR:
+      if (fcode == ARM_BUILTIN_GET_FPSCR)
+	{
+	  icode = CODE_FOR_get_fpscr;
+	  target = gen_reg_rtx (SImode);
+	  pat = GEN_FCN (icode) (target);
+	}
+      else
+	{
+	  target = NULL_RTX;
+	  icode = CODE_FOR_set_fpscr;
+	  arg0 = CALL_EXPR_ARG (exp, 0);
+	  op0 = expand_normal (arg0);
+	  pat = GEN_FCN (icode) (op0);
+	}
+      emit_insn (pat);
+      return target;
+
     case ARM_BUILTIN_TEXTRMSB:
     case ARM_BUILTIN_TEXTRMUB:
     case ARM_BUILTIN_TEXTRMSH:
@@ -25903,7 +26436,7 @@
   int pops_needed;
   unsigned available;
   unsigned required;
-  int mode;
+  enum machine_mode mode;
   int size;
   int restore_a4 = FALSE;
 
@@ -29423,10 +29956,14 @@
 {
   switch (arm_tune)
     {
+    case xgene1:
+      return 4;
+
     case cortexa15:
     case cortexa57:
       return 3;
 
+    case cortexm7:
     case cortexr4:
     case cortexr4f:
     case cortexr5:
@@ -29436,6 +29973,7 @@
     case cortexa8:
     case cortexa9:
     case cortexa12:
+    case cortexa17:
     case cortexa53:
     case fa726te:
     case marvell_pj4:
@@ -29446,6 +29984,23 @@
     }
 }
 
+/* Return how many instructions should scheduler lookahead to choose the
+   best one.  */
+static int
+arm_first_cycle_multipass_dfa_lookahead (void)
+{
+  int issue_rate = arm_issue_rate ();
+
+  return issue_rate > 1 ? issue_rate : 0;
+}
+
+/* Enable modeling of L2 auto-prefetcher.  */
+static int
+arm_first_cycle_multipass_dfa_lookahead_guard (rtx insn, int ready_index)
+{
+  return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
+}
+
 /* A table and a function to perform ARM-specific name mangling for
    NEON vector types in order to conform to the AAPCS (see "Procedure
    Call Standard for the ARM Architecture", Appendix A).  To qualify
@@ -29570,10 +30125,10 @@
 {
   enum machine_mode in_mode, out_mode;
   int in_n, out_n;
+  bool out_unsigned_p = TYPE_UNSIGNED (type_out);
 
   if (TREE_CODE (type_out) != VECTOR_TYPE
-      || TREE_CODE (type_in) != VECTOR_TYPE
-      || !(TARGET_NEON && TARGET_FPU_ARMV8 && flag_unsafe_math_optimizations))
+      || TREE_CODE (type_in) != VECTOR_TYPE)
     return NULL_TREE;
 
   out_mode = TYPE_MODE (TREE_TYPE (type_out));
@@ -29585,7 +30140,13 @@
    decl of the vectorized builtin for the appropriate vector mode.
    NULL_TREE is returned if no such builtin is available.  */
 #undef ARM_CHECK_BUILTIN_MODE
-#define ARM_CHECK_BUILTIN_MODE(C) \
+#define ARM_CHECK_BUILTIN_MODE(C)    \
+  (TARGET_NEON && TARGET_FPU_ARMV8   \
+   && flag_unsafe_math_optimizations \
+   && ARM_CHECK_BUILTIN_MODE_1 (C))
+
+#undef ARM_CHECK_BUILTIN_MODE_1
+#define ARM_CHECK_BUILTIN_MODE_1(C) \
   (out_mode == SFmode && out_n == C \
    && in_mode == SFmode && in_n == C)
 
@@ -29610,6 +30171,67 @@
             return ARM_FIND_VRINT_VARIANT (vrintz);
           case BUILT_IN_ROUNDF:
             return ARM_FIND_VRINT_VARIANT (vrinta);
+#undef ARM_CHECK_BUILTIN_MODE_1
+#define ARM_CHECK_BUILTIN_MODE_1(C) \
+  (out_mode == SImode && out_n == C \
+   && in_mode == SFmode && in_n == C)
+
+#define ARM_FIND_VCVT_VARIANT(N) \
+  (ARM_CHECK_BUILTIN_MODE (2) \
+   ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sfv2si, false) \
+   : (ARM_CHECK_BUILTIN_MODE (4) \
+     ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sfv4si, false) \
+     : NULL_TREE))
+
+#define ARM_FIND_VCVTU_VARIANT(N) \
+  (ARM_CHECK_BUILTIN_MODE (2) \
+   ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv2sfv2si, false) \
+   : (ARM_CHECK_BUILTIN_MODE (4) \
+     ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv4sfv4si, false) \
+     : NULL_TREE))
+          case BUILT_IN_LROUNDF:
+            return out_unsigned_p
+                     ? ARM_FIND_VCVTU_VARIANT (vcvta)
+                     : ARM_FIND_VCVT_VARIANT (vcvta);
+          case BUILT_IN_LCEILF:
+            return out_unsigned_p
+                     ? ARM_FIND_VCVTU_VARIANT (vcvtp)
+                     : ARM_FIND_VCVT_VARIANT (vcvtp);
+          case BUILT_IN_LFLOORF:
+            return out_unsigned_p
+                     ? ARM_FIND_VCVTU_VARIANT (vcvtm)
+                     : ARM_FIND_VCVT_VARIANT (vcvtm);
+#undef ARM_CHECK_BUILTIN_MODE
+#define ARM_CHECK_BUILTIN_MODE(C, N) \
+  (out_mode == N##mode && out_n == C \
+   && in_mode == N##mode && in_n == C)
+          case BUILT_IN_BSWAP16:
+            if (ARM_CHECK_BUILTIN_MODE (4, HI))
+              return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4hi, false);
+            else if (ARM_CHECK_BUILTIN_MODE (8, HI))
+              return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv8hi, false);
+            else
+              return NULL_TREE;
+          case BUILT_IN_BSWAP32:
+            if (ARM_CHECK_BUILTIN_MODE (2, SI))
+              return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2si, false);
+            else if (ARM_CHECK_BUILTIN_MODE (4, SI))
+              return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4si, false);
+            else
+              return NULL_TREE;
+          case BUILT_IN_BSWAP64:
+            if (ARM_CHECK_BUILTIN_MODE (2, DI))
+              return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2di, false);
+            else
+              return NULL_TREE;
+	  case BUILT_IN_COPYSIGNF:
+	    if (ARM_CHECK_BUILTIN_MODE (2, SF))
+              return arm_builtin_decl (ARM_BUILTIN_NEON_copysignfv2sf, false);
+	    else if (ARM_CHECK_BUILTIN_MODE (4, SF))
+              return arm_builtin_decl (ARM_BUILTIN_NEON_copysignfv4sf, false);
+	    else
+	      return NULL_TREE;
+
           default:
             return NULL_TREE;
         }
@@ -29616,9 +30238,12 @@
     }
   return NULL_TREE;
 }
+#undef ARM_FIND_VCVT_VARIANT
+#undef ARM_FIND_VCVTU_VARIANT
 #undef ARM_CHECK_BUILTIN_MODE
 #undef ARM_FIND_VRINT_VARIANT
 
+
 /* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
 static HOST_WIDE_INT
 arm_vector_alignment (const_tree type)
@@ -31150,6 +31775,72 @@
 
 }
 
+static bool
+arm_macro_fusion_p (void)
+{
+  return current_tune->fuseable_ops != ARM_FUSE_NOTHING;
+}
+
+
+static bool
+aarch_macro_fusion_pair_p (rtx prev, rtx curr)
+{
+  rtx set_dest;
+  rtx prev_set = single_set (prev);
+  rtx curr_set = single_set (curr);
+
+  if (!prev_set
+      || !curr_set)
+    return false;
+
+  if (any_condjump_p (curr))
+    return false;
+
+  if (!arm_macro_fusion_p ())
+    return false;
+
+  if (current_tune->fuseable_ops & ARM_FUSE_MOVW_MOVT)
+    {
+      /* We are trying to fuse
+         movw imm / movt imm
+         instructions as a group that gets scheduled together.  */
+
+      set_dest = SET_DEST (curr_set);
+
+      if (GET_MODE (set_dest) != SImode)
+        return false;
+
+      /* We are trying to match:
+         prev (movw)  == (set (reg r0) (const_int imm16))
+         curr (movt) == (set (zero_extract (reg r0)
+                                           (const_int 16)
+                                           (const_int 16))
+                             (const_int imm16_1))
+         or
+         prev (movw) == (set (reg r1)
+                              (high (symbol_ref ("SYM"))))
+         curr (movt) == (set (reg r0)
+                             (lo_sum (reg r1)
+                                     (symbol_ref ("SYM"))))  */
+      if (GET_CODE (set_dest) == ZERO_EXTRACT)
+        {
+          if (CONST_INT_P (SET_SRC (curr_set))
+              && CONST_INT_P (SET_SRC (prev_set))
+              && REG_P (XEXP (set_dest, 0))
+              && REG_P (SET_DEST (prev_set))
+              && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
+            return true;
+        }
+      else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
+               && REG_P (SET_DEST (curr_set))
+               && REG_P (SET_DEST (prev_set))
+               && GET_CODE (SET_SRC (prev_set)) == HIGH
+               && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
+             return true;
+    }
+  return false;
+}
+
 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
 
 static unsigned HOST_WIDE_INT
@@ -31200,6 +31891,75 @@
   return false;
 }
 
+static void
+arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
+{
+  const unsigned ARM_FE_INVALID = 1;
+  const unsigned ARM_FE_DIVBYZERO = 2;
+  const unsigned ARM_FE_OVERFLOW = 4;
+  const unsigned ARM_FE_UNDERFLOW = 8;
+  const unsigned ARM_FE_INEXACT = 16;
+  const unsigned HOST_WIDE_INT ARM_FE_ALL_EXCEPT = (ARM_FE_INVALID
+						    | ARM_FE_DIVBYZERO
+						    | ARM_FE_OVERFLOW
+						    | ARM_FE_UNDERFLOW
+						    | ARM_FE_INEXACT);
+  const unsigned HOST_WIDE_INT ARM_FE_EXCEPT_SHIFT = 8;
+  tree fenv_var, get_fpscr, set_fpscr, mask, ld_fenv, masked_fenv;
+  tree new_fenv_var, reload_fenv, restore_fnenv;
+  tree update_call, atomic_feraiseexcept, hold_fnclex;
+
+  if (!TARGET_VFP || !TARGET_HARD_FLOAT)
+    return;
+
+  /* Generate the equivalent of :
+       unsigned int fenv_var;
+       fenv_var = __builtin_arm_get_fpscr ();
+
+       unsigned int masked_fenv;
+       masked_fenv = fenv_var & mask;
+
+       __builtin_arm_set_fpscr (masked_fenv);  */
+
+  fenv_var = create_tmp_var (unsigned_type_node, NULL);
+  get_fpscr = arm_builtin_decls[ARM_BUILTIN_GET_FPSCR];
+  set_fpscr = arm_builtin_decls[ARM_BUILTIN_SET_FPSCR];
+  mask = build_int_cst (unsigned_type_node,
+			~((ARM_FE_ALL_EXCEPT << ARM_FE_EXCEPT_SHIFT)
+			  | ARM_FE_ALL_EXCEPT));
+  ld_fenv = build2 (MODIFY_EXPR, unsigned_type_node,
+		    fenv_var, build_call_expr (get_fpscr, 0));
+  masked_fenv = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, mask);
+  hold_fnclex = build_call_expr (set_fpscr, 1, masked_fenv);
+  *hold = build2 (COMPOUND_EXPR, void_type_node,
+		  build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
+		  hold_fnclex);
+
+  /* Store the value of masked_fenv to clear the exceptions:
+     __builtin_arm_set_fpscr (masked_fenv);  */
+
+  *clear = build_call_expr (set_fpscr, 1, masked_fenv);
+
+  /* Generate the equivalent of :
+       unsigned int new_fenv_var;
+       new_fenv_var = __builtin_arm_get_fpscr ();
+
+       __builtin_arm_set_fpscr (fenv_var);
+
+       __atomic_feraiseexcept (new_fenv_var);  */
+
+  new_fenv_var = create_tmp_var (unsigned_type_node, NULL);
+  reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, new_fenv_var,
+			build_call_expr (get_fpscr, 0));
+  restore_fnenv = build_call_expr (set_fpscr, 1, fenv_var);
+  atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
+  update_call = build_call_expr (atomic_feraiseexcept, 1,
+				 fold_convert (integer_type_node, new_fenv_var));
+  *update = build2 (COMPOUND_EXPR, void_type_node,
+		    build2 (COMPOUND_EXPR, void_type_node,
+			    reload_fenv, restore_fnenv), update_call);
+}
+
 /* return TRUE if x is a reference to a value in a constant pool */
 extern bool
 arm_is_constant_pool_ref (rtx x)
--- a/src/gcc/config/arm/t-aprofile
+++ b/src/gcc/config/arm/t-aprofile
@@ -83,10 +83,14 @@
 MULTILIB_MATCHES       += march?armv7-a=mcpu?cortex-a5
 MULTILIB_MATCHES       += march?armv7ve=mcpu?cortex-a15
 MULTILIB_MATCHES       += march?armv7ve=mcpu?cortex-a12
+MULTILIB_MATCHES       += march?armv7ve=mcpu?cortex-a17
 MULTILIB_MATCHES       += march?armv7ve=mcpu?cortex-a15.cortex-a7
+MULTILIB_MATCHES       += march?armv7ve=mcpu?cortex-a17.cortex-a7
 MULTILIB_MATCHES       += march?armv8-a=mcpu?cortex-a53
 MULTILIB_MATCHES       += march?armv8-a=mcpu?cortex-a57
 MULTILIB_MATCHES       += march?armv8-a=mcpu?cortex-a57.cortex-a53
+MULTILIB_MATCHES       += march?armv8-a=mcpu?cortex-a72
+MULTILIB_MATCHES       += march?armv8-a=mcpu?cortex-a72.cortex-a53
 
 # Arch Matches
 MULTILIB_MATCHES       += march?armv8-a=march?armv8-a+crc
--- a/src/gcc/config/arm/arm.h
+++ b/src/gcc/config/arm/arm.h
@@ -166,7 +166,10 @@
 	    builtin_define ("__ARM_EABI__");		\
 	  }						\
 	if (TARGET_IDIV)				\
-	  builtin_define ("__ARM_ARCH_EXT_IDIV__");	\
+         {						\
+            builtin_define ("__ARM_ARCH_EXT_IDIV__");	\
+            builtin_define ("__ARM_FEATURE_IDIV");	\
+         }						\
     } while (0)
 
 #include "config/arm/arm-opts.h"
@@ -298,6 +301,9 @@
 /* FPU supports VFPv3 instructions.  */
 #define TARGET_VFP3 (TARGET_VFP && arm_fpu_desc->rev >= 3)
 
+/* FPU supports FPv5 instructions.  */
+#define TARGET_VFP5 (TARGET_VFP && arm_fpu_desc->rev >= 5)
+
 /* FPU only supports VFP single-precision instructions.  */
 #define TARGET_VFP_SINGLE (TARGET_VFP && arm_fpu_desc->regs == VFP_REG_SINGLE)
 
@@ -442,9 +448,6 @@
 #define TARGET_DEFAULT_FLOAT_ABI ARM_FLOAT_ABI_SOFT
 #endif
 
-#define LARGEST_EXPONENT_IS_NORMAL(bits) \
-    ((bits) == 16 && arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
-
 #ifndef ARM_DEFAULT_ABI
 #define ARM_DEFAULT_ABI ARM_ABI_APCS
 #endif
@@ -2139,9 +2142,9 @@
    : reverse_condition (code))
 
 #define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
-  ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE))
+  ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2)
 #define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
-  ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE))
+  ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2)
 
 #define CC_STATUS_INIT \
   do { cfun->machine->thumb1_cc_insn = NULL_RTX; } while (0)
--- a/src/gcc/config/arm/cortex-a9-neon.md
+++ b/src/gcc/config/arm/cortex-a9-neon.md
@@ -376,7 +376,7 @@
 (define_insn_reservation "cortex_a9_neon_vmov" 3
   (and (eq_attr "tune" "cortexa9")
        (eq_attr "cortex_a9_neon_type" "neon_vmov"))
-  "cortex_a8_neon_dp")
+  "cortex_a9_neon_dp")
 
 ;; Instructions using this reservation read their (D|Q)n operands at N2,
 ;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
--- a/src/gcc/config/arm/unspecs.md
+++ b/src/gcc/config/arm/unspecs.md
@@ -143,6 +143,8 @@
   VUNSPEC_SLX		; Represent a store-register-release-exclusive.
   VUNSPEC_LDA		; Represent a store-register-acquire.
   VUNSPEC_STL		; Represent a store-register-release.
+  VUNSPEC_GET_FPSCR	; Represent fetch of FPSCR content.
+  VUNSPEC_SET_FPSCR	; Represent assign of FPSCR content.
 ])
 
 ;; Enumerators for NEON unspecs.
--- a/src/gcc/config/arm/cortex-m4.md
+++ b/src/gcc/config/arm/cortex-m4.md
@@ -34,7 +34,7 @@
        (ior (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
                              alu_reg,alus_reg,logic_reg,logics_reg,\
                              adc_imm,adcs_imm,adc_reg,adcs_reg,\
-                             adr,bfm,rev,\
+                             adr,bfm,clz,rbit,rev,\
                              shift_imm,shift_reg,extend,\
                              alu_shift_imm,alus_shift_imm,\
                              logic_shift_imm,logics_shift_imm,\
--- a/src/gcc/config/arm/arm-modes.def
+++ b/src/gcc/config/arm/arm-modes.def
@@ -21,9 +21,6 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-/* Extended precision floating point.
-   FIXME What format is this?  */
-FLOAT_MODE (XF, 12, 0);
 
 /* Half-precision floating point */
 FLOAT_MODE (HF, 2, 0);
--- a/src/gcc/config/arm/arm-cores.def
+++ b/src/gcc/config/arm/arm-cores.def
@@ -137,18 +137,25 @@
 ARM_CORE("cortex-m0",		cortexm0, cortexm0,		6M, FL_LDSCHED, v6m)
 ARM_CORE("cortex-m0plus",	cortexm0plus, cortexm0plus,	6M, FL_LDSCHED, v6m)
 
+/* V6M Architecture Processors for small-multiply implementations.  */
+ARM_CORE("cortex-m1.small-multiply",	cortexm1smallmultiply, cortexm1,	6M, FL_LDSCHED | FL_SMALLMUL, v6m)
+ARM_CORE("cortex-m0.small-multiply",	cortexm0smallmultiply, cortexm0,	6M, FL_LDSCHED | FL_SMALLMUL, v6m)
+ARM_CORE("cortex-m0plus.small-multiply",cortexm0plussmallmultiply, cortexm0plus,6M, FL_LDSCHED | FL_SMALLMUL, v6m)
+
 /* V7 Architecture Processors */
 ARM_CORE("generic-armv7-a",	genericv7a, genericv7a,		7A,  FL_LDSCHED, cortex)
 ARM_CORE("cortex-a5",		cortexa5, cortexa5,		7A,  FL_LDSCHED, cortex_a5)
 ARM_CORE("cortex-a7",		cortexa7, cortexa7,		7A,  FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a7)
-ARM_CORE("cortex-a8",		cortexa8, cortexa8,		7A,  FL_LDSCHED, cortex)
+ARM_CORE("cortex-a8",		cortexa8, cortexa8,		7A,  FL_LDSCHED, cortex_a8)
 ARM_CORE("cortex-a9",		cortexa9, cortexa9,		7A,  FL_LDSCHED, cortex_a9)
-ARM_CORE("cortex-a12",	  	cortexa12, cortexa15,		7A,  FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a12)
+ARM_CORE("cortex-a12",	  	cortexa12, cortexa17,		7A,  FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a12)
 ARM_CORE("cortex-a15",		cortexa15, cortexa15,		7A,  FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a15)
+ARM_CORE("cortex-a17",		cortexa17, cortexa17,		7A,  FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a12)
 ARM_CORE("cortex-r4",		cortexr4, cortexr4,		7R,  FL_LDSCHED, cortex)
 ARM_CORE("cortex-r4f",		cortexr4f, cortexr4f,		7R,  FL_LDSCHED, cortex)
 ARM_CORE("cortex-r5",		cortexr5, cortexr5,		7R,  FL_LDSCHED | FL_ARM_DIV, cortex)
 ARM_CORE("cortex-r7",		cortexr7, cortexr7,		7R,  FL_LDSCHED | FL_ARM_DIV, cortex)
+ARM_CORE("cortex-m7",		cortexm7, cortexm7,		7EM, FL_LDSCHED, cortex_m7)
 ARM_CORE("cortex-m4",		cortexm4, cortexm4,		7EM, FL_LDSCHED, v7m)
 ARM_CORE("cortex-m3",		cortexm3, cortexm3,		7M,  FL_LDSCHED, v7m)
 ARM_CORE("marvell-pj4",		marvell_pj4, marvell_pj4,	7A,  FL_LDSCHED, 9e)
@@ -155,10 +162,14 @@
 
 /* V7 big.LITTLE implementations */
 ARM_CORE("cortex-a15.cortex-a7", cortexa15cortexa7, cortexa7,	7A,  FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a15)
+ARM_CORE("cortex-a17.cortex-a7", cortexa17cortexa7, cortexa7,	7A,  FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a12)
 
 /* V8 Architecture Processors */
 ARM_CORE("cortex-a53",	cortexa53, cortexa53,	8A, FL_LDSCHED | FL_CRC32, cortex_a53)
-ARM_CORE("cortex-a57",	cortexa57, cortexa15,	8A, FL_LDSCHED | FL_CRC32, cortex_a57)
+ARM_CORE("cortex-a57",	cortexa57, cortexa57,	8A, FL_LDSCHED | FL_CRC32, cortex_a57)
+ARM_CORE("cortex-a72",	cortexa72, cortexa57,	8A, FL_LDSCHED | FL_CRC32, cortex_a57)
+ARM_CORE("xgene1",      xgene1,    xgene1,      8A, FL_LDSCHED,            xgene1)
 
 /* V8 big.LITTLE implementations */
 ARM_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A,  FL_LDSCHED | FL_CRC32, cortex_a57)
+ARM_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, 8A,  FL_LDSCHED | FL_CRC32, cortex_a57)
--- a/src/gcc/config/arm/cortex-r4.md
+++ b/src/gcc/config/arm/cortex-r4.md
@@ -81,7 +81,7 @@
        (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
                         alu_reg,alus_reg,logic_reg,logics_reg,\
                         adc_imm,adcs_imm,adc_reg,adcs_reg,\
-                        adr,bfm,rev,\
+                        adr,bfm,clz,rbit,rev,\
                         shift_imm,shift_reg,mvn_imm,mvn_reg"))
   "cortex_r4_alu")
 
--- a/src/gcc/config/arm/arm-tune.md
+++ b/src/gcc/config/arm/arm-tune.md
@@ -25,10 +25,13 @@
 	arm1176jzs,arm1176jzfs,mpcorenovfp,
 	mpcore,arm1156t2s,arm1156t2fs,
 	cortexm1,cortexm0,cortexm0plus,
+	cortexm1smallmultiply,cortexm0smallmultiply,cortexm0plussmallmultiply,
 	genericv7a,cortexa5,cortexa7,
 	cortexa8,cortexa9,cortexa12,
-	cortexa15,cortexr4,cortexr4f,
-	cortexr5,cortexr7,cortexm4,
-	cortexm3,marvell_pj4,cortexa15cortexa7,
-	cortexa53,cortexa57,cortexa57cortexa53"
+	cortexa15,cortexa17,cortexr4,
+	cortexr4f,cortexr5,cortexr7,
+	cortexm7,cortexm4,cortexm3,
+	marvell_pj4,cortexa15cortexa7,cortexa17cortexa7,
+	cortexa53,cortexa57,cortexa72,
+	xgene1,cortexa57cortexa53,cortexa72cortexa53"
 	(const (symbol_ref "((enum attr_tune) arm_tune)")))
--- a/src/gcc/config/arm/arm-protos.h
+++ b/src/gcc/config/arm/arm-protos.h
@@ -126,7 +126,6 @@
 extern int arm_const_double_inline_cost (rtx);
 extern bool arm_const_double_by_parts (rtx);
 extern bool arm_const_double_by_immediates (rtx);
-extern const char *fp_immediate_constant (rtx);
 extern void arm_emit_call_insn (rtx, rtx);
 extern const char *output_call (rtx *);
 extern const char *output_call_mem (rtx *);
@@ -150,7 +149,7 @@
 extern int    arm_emit_vector_const (FILE *, rtx);
 extern void arm_emit_fp16_const (rtx c);
 extern const char * arm_output_load_gr (rtx *);
-extern const char *vfp_output_fstmd (rtx *);
+extern const char *vfp_output_vstmd (rtx *);
 extern void arm_output_multireg_pop (rtx *, bool, rtx, bool, bool);
 extern void arm_set_return_address (rtx, rtx);
 extern int arm_eliminable_register (rtx);
@@ -250,6 +249,13 @@
 
 struct cpu_cost_table;
 
+enum arm_sched_autopref
+  {
+    ARM_SCHED_AUTOPREF_OFF,
+    ARM_SCHED_AUTOPREF_RANK,
+    ARM_SCHED_AUTOPREF_FULL
+  };
+
 struct tune_params
 {
   bool (*rtx_costs) (rtx, RTX_CODE, RTX_CODE, int *, bool);
@@ -273,6 +279,15 @@
   const struct cpu_vec_costs* vec_costs;
   /* Prefer Neon for 64-bit bitops.  */
   bool prefer_neon_for_64bits;
+  /* Prefer 32-bit encoding instead of flag-setting 16-bit encoding.  */
+  bool disparage_flag_setting_t16_encodings;
+  /* Prefer 32-bit encoding instead of 16-bit encoding where subset of flags
+     would be set.  */
+  bool disparage_partial_flag_setting_t16_encodings;
+  /* Depth of scheduling queue to check for L2 autoprefetcher.  */
+  enum arm_sched_autopref sched_autopref;
+  /* Bitfield encoding the fuseable pairs of instructions.  */
+  unsigned int fuseable_ops;
 };
 
 extern const struct tune_params *current_tune;
--- a/src/gcc/config/arm/vfp.md
+++ b/src/gcc/config/arm/vfp.md
@@ -41,11 +41,11 @@
     case 5:
       return \"str%?\\t%1, %0\";
     case 6:
-      return \"fmsr%?\\t%0, %1\\t%@ int\";
+      return \"vmov%?\\t%0, %1\\t%@ int\";
     case 7:
-      return \"fmrs%?\\t%0, %1\\t%@ int\";
+      return \"vmov%?\\t%0, %1\\t%@ int\";
     case 8:
-      return \"fcpys%?\\t%0, %1\\t%@ int\";
+      return \"vmov%?.f32\\t%0, %1\\t%@ int\";
     case 9: case 10:
       return output_move_vfp (operands);
     default:
@@ -87,11 +87,11 @@
     case 8:
       return \"str%?\\t%1, %0\";
     case 9:
-      return \"fmsr%?\\t%0, %1\\t%@ int\";
+      return \"vmov%?\\t%0, %1\\t%@ int\";
     case 10:
-      return \"fmrs%?\\t%0, %1\\t%@ int\";
+      return \"vmov%?\\t%0, %1\\t%@ int\";
     case 11:
-      return \"fcpys%?\\t%0, %1\\t%@ int\";
+      return \"vmov%?.f32\\t%0, %1\\t%@ int\";
     case 12: case 13:
       return output_move_vfp (operands);
     default:
@@ -100,7 +100,7 @@
   "
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no,no,no,no,no,no,no,no")
-   (set_attr "type" "mov_reg,mov_reg,mov_reg,mvn_reg,mov_reg,load1,load1,store1,store1,f_mcr,f_mrc,fmov,f_loads,f_stores")
+   (set_attr "type" "mov_reg,mov_reg,mov_reg,mvn_reg,mov_imm,load1,load1,store1,store1,f_mcr,f_mrc,fmov,f_loads,f_stores")
    (set_attr "length" "2,4,2,4,4,4,4,4,4,4,4,4,4,4")
    (set_attr "pool_range"     "*,*,*,*,*,1018,4094,*,*,*,*,*,1018,*")
    (set_attr "neg_pool_range" "*,*,*,*,*,   0,   0,*,*,*,*,*,1008,*")]
@@ -130,14 +130,14 @@
     case 6:
       return output_move_double (operands, true, NULL);
     case 7:
-      return \"fmdrr%?\\t%P0, %Q1, %R1\\t%@ int\";
+      return \"vmov%?\\t%P0, %Q1, %R1\\t%@ int\";
     case 8:
-      return \"fmrrd%?\\t%Q0, %R0, %P1\\t%@ int\";
+      return \"vmov%?\\t%Q0, %R0, %P1\\t%@ int\";
     case 9:
       if (TARGET_VFP_SINGLE)
-	return \"fcpys%?\\t%0, %1\\t%@ int\;fcpys%?\\t%p0, %p1\\t%@ int\";
+	return \"vmov%?.f32\\t%0, %1\\t%@ int\;vmov%?.f32\\t%p0, %p1\\t%@ int\";
       else
-	return \"fcpyd%?\\t%P0, %P1\\t%@ int\";
+	return \"vmov%?.f64\\t%P0, %P1\\t%@ int\";
     case 10: case 11:
       return output_move_vfp (operands);
     default:
@@ -181,11 +181,11 @@
     case 6:
       return output_move_double (operands, true, NULL);
     case 7:
-      return \"fmdrr%?\\t%P0, %Q1, %R1\\t%@ int\";
+      return \"vmov%?\\t%P0, %Q1, %R1\\t%@ int\";
     case 8:
-      return \"fmrrd%?\\t%Q0, %R0, %P1\\t%@ int\";
+      return \"vmov%?\\t%Q0, %R0, %P1\\t%@ int\";
     case 9:
-      return \"fcpyd%?\\t%P0, %P1\\t%@ int\";
+      return \"vmov%?.f64\\t%P0, %P1\\t%@ int\";
     case 10: case 11:
       return output_move_vfp (operands);
     default:
@@ -229,13 +229,13 @@
     case 3:     /* memory from ARM register */
       return \"strh\\t%1, %0\\t%@ __fp16\";
     case 4:	/* S register from S register */
-      return \"fcpys\\t%0, %1\";
+      return \"vmov.f32\\t%0, %1\";
     case 5:	/* ARM register from ARM register */
       return \"mov\\t%0, %1\\t%@ __fp16\";
     case 6:	/* S register from ARM register */
-      return \"fmsr\\t%0, %1\";
+      return \"vmov\\t%0, %1\";
     case 7:	/* ARM register from S register */
-      return \"fmrs\\t%0, %1\";
+      return \"vmov\\t%0, %1\";
     case 8:	/* ARM register from constant */
       {
         REAL_VALUE_TYPE r;
@@ -280,13 +280,13 @@
     case 1:     /* memory from ARM register */
       return \"strh\\t%1, %0\\t%@ __fp16\";
     case 2:	/* S register from S register */
-      return \"fcpys\\t%0, %1\";
+      return \"vmov.f32\\t%0, %1\";
     case 3:	/* ARM register from ARM register */
       return \"mov\\t%0, %1\\t%@ __fp16\";
     case 4:	/* S register from ARM register */
-      return \"fmsr\\t%0, %1\";
+      return \"vmov\\t%0, %1\";
     case 5:	/* ARM register from S register */
-      return \"fmrs\\t%0, %1\";
+      return \"vmov\\t%0, %1\";
     case 6:	/* ARM register from constant */
       {
         REAL_VALUE_TYPE r;
@@ -322,7 +322,7 @@
 
 (define_insn "*movsf_vfp"
   [(set (match_operand:SF 0 "nonimmediate_operand" "=t,?r,t ,t  ,Uv,r ,m,t,r")
-	(match_operand:SF 1 "general_operand"	   " ?r,t,Dv,UvE,t, mE,r,t,r"))]
+        (match_operand:SF 1 "general_operand"	   " ?r,t,Dv,UvE,t, mE,r,t,r"))]
   "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP
    && (   s_register_operand (operands[0], SFmode)
        || s_register_operand (operands[1], SFmode))"
@@ -330,11 +330,11 @@
   switch (which_alternative)
     {
     case 0:
-      return \"fmsr%?\\t%0, %1\";
+      return \"vmov%?\\t%0, %1\";
     case 1:
-      return \"fmrs%?\\t%0, %1\";
+      return \"vmov%?\\t%0, %1\";
     case 2:
-      return \"fconsts%?\\t%0, #%G1\";
+      return \"vmov%?.f32\\t%0, %1\";
     case 3: case 4:
       return output_move_vfp (operands);
     case 5:
@@ -342,7 +342,7 @@
     case 6:
       return \"str%?\\t%1, %0\\t%@ float\";
     case 7:
-      return \"fcpys%?\\t%0, %1\";
+      return \"vmov%?.f32\\t%0, %1\";
     case 8:
       return \"mov%?\\t%0, %1\\t%@ float\";
     default:
@@ -366,11 +366,11 @@
   switch (which_alternative)
     {
     case 0:
-      return \"fmsr%?\\t%0, %1\";
+      return \"vmov%?\\t%0, %1\";
     case 1:
-      return \"fmrs%?\\t%0, %1\";
+      return \"vmov%?\\t%0, %1\";
     case 2:
-      return \"fconsts%?\\t%0, #%G1\";
+      return \"vmov%?.f32\\t%0, %1\";
     case 3: case 4:
       return output_move_vfp (operands);
     case 5:
@@ -378,7 +378,7 @@
     case 6:
       return \"str%?\\t%1, %0\\t%@ float\";
     case 7:
-      return \"fcpys%?\\t%0, %1\";
+      return \"vmov%?.f32\\t%0, %1\";
     case 8:
       return \"mov%?\\t%0, %1\\t%@ float\";
     default:
@@ -406,12 +406,12 @@
     switch (which_alternative)
       {
       case 0:
-	return \"fmdrr%?\\t%P0, %Q1, %R1\";
+	return \"vmov%?\\t%P0, %Q1, %R1\";
       case 1:
-	return \"fmrrd%?\\t%Q0, %R0, %P1\";
+	return \"vmov%?\\t%Q0, %R0, %P1\";
       case 2:
 	gcc_assert (TARGET_VFP_DOUBLE);
-        return \"fconstd%?\\t%P0, #%G1\";
+        return \"vmov%?.f64\\t%P0, %1\";
       case 3: case 4:
 	return output_move_vfp (operands);
       case 5: case 6:
@@ -418,9 +418,9 @@
 	return output_move_double (operands, true, NULL);
       case 7:
 	if (TARGET_VFP_SINGLE)
-	  return \"fcpys%?\\t%0, %1\;fcpys%?\\t%p0, %p1\";
+	  return \"vmov%?.f32\\t%0, %1\;vmov%?.f32\\t%p0, %p1\";
 	else
-	  return \"fcpyd%?\\t%P0, %P1\";
+	  return \"vmov%?.f64\\t%P0, %P1\";
       case 8:
         return \"#\";
       default:
@@ -453,12 +453,12 @@
     switch (which_alternative)
       {
       case 0:
-	return \"fmdrr%?\\t%P0, %Q1, %R1\";
+	return \"vmov%?\\t%P0, %Q1, %R1\";
       case 1:
-	return \"fmrrd%?\\t%Q0, %R0, %P1\";
+	return \"vmov%?\\t%Q0, %R0, %P1\";
       case 2:
 	gcc_assert (TARGET_VFP_DOUBLE);
-	return \"fconstd%?\\t%P0, #%G1\";
+	return \"vmov%?.f64\\t%P0, %1\";
       case 3: case 4:
 	return output_move_vfp (operands);
       case 5: case 6: case 8:
@@ -465,9 +465,9 @@
 	return output_move_double (operands, true, NULL);
       case 7:
 	if (TARGET_VFP_SINGLE)
-	  return \"fcpys%?\\t%0, %1\;fcpys%?\\t%p0, %p1\";
+	  return \"vmov%?.f32\\t%0, %1\;vmov%?.f32\\t%p0, %p1\";
 	else
-	  return \"fcpyd%?\\t%P0, %P1\";
+	  return \"vmov%?.f64\\t%P0, %P1\";
       default:
 	abort ();
       }
@@ -498,15 +498,15 @@
 	  (match_operand:SF 2 "s_register_operand" "t,0,t,?r,0,?r,t,0,t")))]
   "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP"
   "@
-   fcpys%D3\\t%0, %2
-   fcpys%d3\\t%0, %1
-   fcpys%D3\\t%0, %2\;fcpys%d3\\t%0, %1
-   fmsr%D3\\t%0, %2
-   fmsr%d3\\t%0, %1
-   fmsr%D3\\t%0, %2\;fmsr%d3\\t%0, %1
-   fmrs%D3\\t%0, %2
-   fmrs%d3\\t%0, %1
-   fmrs%D3\\t%0, %2\;fmrs%d3\\t%0, %1"
+   vmov%D3.f32\\t%0, %2
+   vmov%d3.f32\\t%0, %1
+   vmov%D3.f32\\t%0, %2\;vmov%d3.f32\\t%0, %1
+   vmov%D3\\t%0, %2
+   vmov%d3\\t%0, %1
+   vmov%D3\\t%0, %2\;vmov%d3\\t%0, %1
+   vmov%D3\\t%0, %2
+   vmov%d3\\t%0, %1
+   vmov%D3\\t%0, %2\;vmov%d3\\t%0, %1"
    [(set_attr "conds" "use")
     (set_attr "length" "4,4,8,4,4,8,4,4,8")
     (set_attr "type" "fmov,fmov,fmov,f_mcr,f_mcr,f_mcr,f_mrc,f_mrc,f_mrc")]
@@ -521,15 +521,15 @@
 	  (match_operand:SF 2 "s_register_operand" "t,0,t,?r,0,?r,t,0,t")))]
   "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP && !arm_restrict_it"
   "@
-   it\\t%D3\;fcpys%D3\\t%0, %2
-   it\\t%d3\;fcpys%d3\\t%0, %1
-   ite\\t%D3\;fcpys%D3\\t%0, %2\;fcpys%d3\\t%0, %1
-   it\\t%D3\;fmsr%D3\\t%0, %2
-   it\\t%d3\;fmsr%d3\\t%0, %1
-   ite\\t%D3\;fmsr%D3\\t%0, %2\;fmsr%d3\\t%0, %1
-   it\\t%D3\;fmrs%D3\\t%0, %2
-   it\\t%d3\;fmrs%d3\\t%0, %1
-   ite\\t%D3\;fmrs%D3\\t%0, %2\;fmrs%d3\\t%0, %1"
+   it\\t%D3\;vmov%D3.f32\\t%0, %2
+   it\\t%d3\;vmov%d3.f32\\t%0, %1
+   ite\\t%D3\;vmov%D3.f32\\t%0, %2\;vmov%d3.f32\\t%0, %1
+   it\\t%D3\;vmov%D3\\t%0, %2
+   it\\t%d3\;vmov%d3\\t%0, %1
+   ite\\t%D3\;vmov%D3\\t%0, %2\;vmov%d3\\t%0, %1
+   it\\t%D3\;vmov%D3\\t%0, %2
+   it\\t%d3\;vmov%d3\\t%0, %1
+   ite\\t%D3\;vmov%D3\\t%0, %2\;vmov%d3\\t%0, %1"
    [(set_attr "conds" "use")
     (set_attr "length" "6,6,10,6,6,10,6,6,10")
     (set_attr "type" "fmov,fmov,fmov,f_mcr,f_mcr,f_mcr,f_mrc,f_mrc,f_mrc")]
@@ -544,15 +544,15 @@
 	  (match_operand:DF 2 "s_register_operand" "w,0,w,?r,0,?r,w,0,w")))]
   "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   "@
-   fcpyd%D3\\t%P0, %P2
-   fcpyd%d3\\t%P0, %P1
-   fcpyd%D3\\t%P0, %P2\;fcpyd%d3\\t%P0, %P1
-   fmdrr%D3\\t%P0, %Q2, %R2
-   fmdrr%d3\\t%P0, %Q1, %R1
-   fmdrr%D3\\t%P0, %Q2, %R2\;fmdrr%d3\\t%P0, %Q1, %R1
-   fmrrd%D3\\t%Q0, %R0, %P2
-   fmrrd%d3\\t%Q0, %R0, %P1
-   fmrrd%D3\\t%Q0, %R0, %P2\;fmrrd%d3\\t%Q0, %R0, %P1"
+   vmov%D3.f64\\t%P0, %P2
+   vmov%d3.f64\\t%P0, %P1
+   vmov%D3.f64\\t%P0, %P2\;vmov%d3.f64\\t%P0, %P1
+   vmov%D3\\t%P0, %Q2, %R2
+   vmov%d3\\t%P0, %Q1, %R1
+   vmov%D3\\t%P0, %Q2, %R2\;vmov%d3\\t%P0, %Q1, %R1
+   vmov%D3\\t%Q0, %R0, %P2
+   vmov%d3\\t%Q0, %R0, %P1
+   vmov%D3\\t%Q0, %R0, %P2\;vmov%d3\\t%Q0, %R0, %P1"
    [(set_attr "conds" "use")
     (set_attr "length" "4,4,8,4,4,8,4,4,8")
     (set_attr "type" "ffarithd,ffarithd,ffarithd,f_mcr,f_mcr,f_mcr,f_mrrc,f_mrrc,f_mrrc")]
@@ -567,15 +567,15 @@
 	  (match_operand:DF 2 "s_register_operand" "w,0,w,?r,0,?r,w,0,w")))]
   "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE && !arm_restrict_it"
   "@
-   it\\t%D3\;fcpyd%D3\\t%P0, %P2
-   it\\t%d3\;fcpyd%d3\\t%P0, %P1
-   ite\\t%D3\;fcpyd%D3\\t%P0, %P2\;fcpyd%d3\\t%P0, %P1
-   it\t%D3\;fmdrr%D3\\t%P0, %Q2, %R2
-   it\t%d3\;fmdrr%d3\\t%P0, %Q1, %R1
-   ite\\t%D3\;fmdrr%D3\\t%P0, %Q2, %R2\;fmdrr%d3\\t%P0, %Q1, %R1
-   it\t%D3\;fmrrd%D3\\t%Q0, %R0, %P2
-   it\t%d3\;fmrrd%d3\\t%Q0, %R0, %P1
-   ite\\t%D3\;fmrrd%D3\\t%Q0, %R0, %P2\;fmrrd%d3\\t%Q0, %R0, %P1"
+   it\\t%D3\;vmov%D3.f64\\t%P0, %P2
+   it\\t%d3\;vmov%d3.f64\\t%P0, %P1
+   ite\\t%D3\;vmov%D3.f64\\t%P0, %P2\;vmov%d3.f64\\t%P0, %P1
+   it\t%D3\;vmov%D3\\t%P0, %Q2, %R2
+   it\t%d3\;vmov%d3\\t%P0, %Q1, %R1
+   ite\\t%D3\;vmov%D3\\t%P0, %Q2, %R2\;vmov%d3\\t%P0, %Q1, %R1
+   it\t%D3\;vmov%D3\\t%Q0, %R0, %P2
+   it\t%d3\;vmov%d3\\t%Q0, %R0, %P1
+   ite\\t%D3\;vmov%D3\\t%Q0, %R0, %P2\;vmov%d3\\t%Q0, %R0, %P1"
    [(set_attr "conds" "use")
     (set_attr "length" "6,6,10,6,6,10,6,6,10")
     (set_attr "type" "ffarithd,ffarithd,ffarithd,f_mcr,f_mcr,f_mcrr,f_mrrc,f_mrrc,f_mrrc")]
@@ -588,7 +588,7 @@
   [(set (match_operand:SF	  0 "s_register_operand" "=t")
 	(abs:SF (match_operand:SF 1 "s_register_operand" "t")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-  "fabss%?\\t%0, %1"
+  "vabs%?.f32\\t%0, %1"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
    (set_attr "type" "ffariths")]
@@ -598,7 +598,7 @@
   [(set (match_operand:DF	  0 "s_register_operand" "=w")
 	(abs:DF (match_operand:DF 1 "s_register_operand" "w")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-  "fabsd%?\\t%P0, %P1"
+  "vabs%?.f64\\t%P0, %P1"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
    (set_attr "type" "ffarithd")]
@@ -609,7 +609,7 @@
 	(neg:SF (match_operand:SF 1 "s_register_operand" "t,r")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "@
-   fnegs%?\\t%0, %1
+   vneg%?.f32\\t%0, %1
    eor%?\\t%0, %1, #-2147483648"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
@@ -621,7 +621,7 @@
 	(neg:DF (match_operand:DF 1 "s_register_operand" "w,0,r")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   "@
-   fnegd%?\\t%P0, %P1
+   vneg%?.f64\\t%P0, %P1
    #
    #"
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE && reload_completed
@@ -671,7 +671,7 @@
 	(plus:SF (match_operand:SF 1 "s_register_operand" "t")
 		 (match_operand:SF 2 "s_register_operand" "t")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-  "fadds%?\\t%0, %1, %2"
+  "vadd%?.f32\\t%0, %1, %2"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
    (set_attr "type" "fadds")]
@@ -682,7 +682,7 @@
 	(plus:DF (match_operand:DF 1 "s_register_operand" "w")
 		 (match_operand:DF 2 "s_register_operand" "w")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-  "faddd%?\\t%P0, %P1, %P2"
+  "vadd%?.f64\\t%P0, %P1, %P2"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
    (set_attr "type" "faddd")]
@@ -694,7 +694,7 @@
 	(minus:SF (match_operand:SF 1 "s_register_operand" "t")
 		  (match_operand:SF 2 "s_register_operand" "t")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-  "fsubs%?\\t%0, %1, %2"
+  "vsub%?.f32\\t%0, %1, %2"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
    (set_attr "type" "fadds")]
@@ -705,7 +705,7 @@
 	(minus:DF (match_operand:DF 1 "s_register_operand" "w")
 		  (match_operand:DF 2 "s_register_operand" "w")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-  "fsubd%?\\t%P0, %P1, %P2"
+  "vsub%?.f64\\t%P0, %P1, %P2"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
    (set_attr "type" "faddd")]
@@ -719,7 +719,7 @@
 	(div:SF (match_operand:SF 1 "s_register_operand" "t")
 		(match_operand:SF 2 "s_register_operand" "t")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-  "fdivs%?\\t%0, %1, %2"
+  "vdiv%?.f32\\t%0, %1, %2"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
    (set_attr "type" "fdivs")]
@@ -730,7 +730,7 @@
 	(div:DF (match_operand:DF 1 "s_register_operand" "w")
 		(match_operand:DF 2 "s_register_operand" "w")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-  "fdivd%?\\t%P0, %P1, %P2"
+  "vdiv%?.f64\\t%P0, %P1, %P2"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
    (set_attr "type" "fdivd")]
@@ -744,7 +744,7 @@
 	(mult:SF (match_operand:SF 1 "s_register_operand" "t")
 		 (match_operand:SF 2 "s_register_operand" "t")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-  "fmuls%?\\t%0, %1, %2"
+  "vmul%?.f32\\t%0, %1, %2"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
    (set_attr "type" "fmuls")]
@@ -755,7 +755,7 @@
 	(mult:DF (match_operand:DF 1 "s_register_operand" "w")
 		 (match_operand:DF 2 "s_register_operand" "w")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-  "fmuld%?\\t%P0, %P1, %P2"
+  "vmul%?.f64\\t%P0, %P1, %P2"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
    (set_attr "type" "fmuld")]
@@ -766,7 +766,7 @@
 	(mult:SF (neg:SF (match_operand:SF 1 "s_register_operand" "t"))
 		 (match_operand:SF	   2 "s_register_operand" "t")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP && !flag_rounding_math"
-  "fnmuls%?\\t%0, %1, %2"
+  "vnmul%?.f32\\t%0, %1, %2"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
    (set_attr "type" "fmuls")]
@@ -777,7 +777,7 @@
 	(neg:SF (mult:SF (match_operand:SF 1 "s_register_operand" "t")
 		 (match_operand:SF	   2 "s_register_operand" "t"))))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-  "fnmuls%?\\t%0, %1, %2"
+  "vnmul%?.f32\\t%0, %1, %2"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
    (set_attr "type" "fmuls")]
@@ -789,7 +789,7 @@
 		 (match_operand:DF	   2 "s_register_operand" "w")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE
   && !flag_rounding_math"
-  "fnmuld%?\\t%P0, %P1, %P2"
+  "vnmul%?.f64\\t%P0, %P1, %P2"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
    (set_attr "type" "fmuld")]
@@ -800,7 +800,7 @@
 	(neg:DF (mult:DF (match_operand:DF 1 "s_register_operand" "w")
 		 (match_operand:DF	   2 "s_register_operand" "w"))))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-  "fnmuld%?\\t%P0, %P1, %P2"
+  "vnmul%?.f64\\t%P0, %P1, %P2"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
    (set_attr "type" "fmuld")]
@@ -816,7 +816,7 @@
 			  (match_operand:SF 3 "s_register_operand" "t"))
 		 (match_operand:SF	    1 "s_register_operand" "0")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-  "fmacs%?\\t%0, %2, %3"
+  "vmla%?.f32\\t%0, %2, %3"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
    (set_attr "type" "fmacs")]
@@ -828,7 +828,7 @@
 			  (match_operand:DF 3 "s_register_operand" "w"))
 		 (match_operand:DF	    1 "s_register_operand" "0")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-  "fmacd%?\\t%P0, %P2, %P3"
+  "vmla%?.f64\\t%P0, %P2, %P3"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
    (set_attr "type" "fmacd")]
@@ -841,7 +841,7 @@
 			   (match_operand:SF 3 "s_register_operand" "t"))
 		  (match_operand:SF	     1 "s_register_operand" "0")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-  "fmscs%?\\t%0, %2, %3"
+  "vnmls%?.f32\\t%0, %2, %3"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
    (set_attr "type" "fmacs")]
@@ -853,7 +853,7 @@
 			   (match_operand:DF 3 "s_register_operand" "w"))
 		  (match_operand:DF	     1 "s_register_operand" "0")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-  "fmscd%?\\t%P0, %P2, %P3"
+  "vnmls%?.f64\\t%P0, %P2, %P3"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
    (set_attr "type" "fmacd")]
@@ -866,7 +866,7 @@
 		  (mult:SF (match_operand:SF 2 "s_register_operand" "t")
 			   (match_operand:SF 3 "s_register_operand" "t"))))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-  "fnmacs%?\\t%0, %2, %3"
+  "vmls%?.f32\\t%0, %2, %3"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
    (set_attr "type" "fmacs")]
@@ -878,7 +878,7 @@
 		  (mult:DF (match_operand:DF 2 "s_register_operand" "w")
 			   (match_operand:DF 3 "s_register_operand" "w"))))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-  "fnmacd%?\\t%P0, %P2, %P3"
+  "vmls%?.f64\\t%P0, %P2, %P3"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
    (set_attr "type" "fmacd")]
@@ -893,7 +893,7 @@
 		    (match_operand:SF	      3 "s_register_operand" "t"))
 		  (match_operand:SF	      1 "s_register_operand" "0")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-  "fnmscs%?\\t%0, %2, %3"
+  "vnmla%?.f32\\t%0, %2, %3"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
    (set_attr "type" "fmacs")]
@@ -906,7 +906,7 @@
 		    (match_operand:DF	      3 "s_register_operand" "w"))
 		  (match_operand:DF	      1 "s_register_operand" "0")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-  "fnmscd%?\\t%P0, %P2, %P3"
+  "vnmla%?.f64\\t%P0, %P2, %P3"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
    (set_attr "type" "fmacd")]
@@ -971,7 +971,7 @@
   [(set (match_operand:DF		   0 "s_register_operand" "=w")
 	(float_extend:DF (match_operand:SF 1 "s_register_operand" "t")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-  "fcvtds%?\\t%P0, %1"
+  "vcvt%?.f64.f32\\t%P0, %1"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
    (set_attr "type" "f_cvt")]
@@ -981,7 +981,7 @@
   [(set (match_operand:SF		   0 "s_register_operand" "=t")
 	(float_truncate:SF (match_operand:DF 1 "s_register_operand" "w")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-  "fcvtsd%?\\t%0, %P1"
+  "vcvt%?.f32.f64\\t%0, %P1"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
    (set_attr "type" "f_cvt")]
@@ -1011,7 +1011,7 @@
   [(set (match_operand:SI		  0 "s_register_operand" "=t")
 	(fix:SI (fix:SF (match_operand:SF 1 "s_register_operand" "t"))))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-  "ftosizs%?\\t%0, %1"
+  "vcvt%?.s32.f32\\t%0, %1"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
    (set_attr "type" "f_cvtf2i")]
@@ -1021,7 +1021,7 @@
   [(set (match_operand:SI		  0 "s_register_operand" "=t")
 	(fix:SI (fix:DF (match_operand:DF 1 "s_register_operand" "w"))))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-  "ftosizd%?\\t%0, %P1"
+  "vcvt%?.s32.f64\\t%0, %P1"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
    (set_attr "type" "f_cvtf2i")]
@@ -1032,7 +1032,7 @@
   [(set (match_operand:SI		  0 "s_register_operand" "=t")
 	(unsigned_fix:SI (fix:SF (match_operand:SF 1 "s_register_operand" "t"))))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-  "ftouizs%?\\t%0, %1"
+  "vcvt%?.u32.f32\\t%0, %1"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
    (set_attr "type" "f_cvtf2i")]
@@ -1042,7 +1042,7 @@
   [(set (match_operand:SI		  0 "s_register_operand" "=t")
 	(unsigned_fix:SI (fix:DF (match_operand:DF 1 "s_register_operand" "t"))))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-  "ftouizd%?\\t%0, %P1"
+  "vcvt%?.u32.f64\\t%0, %P1"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
    (set_attr "type" "f_cvtf2i")]
@@ -1053,7 +1053,7 @@
   [(set (match_operand:SF	    0 "s_register_operand" "=t")
 	(float:SF (match_operand:SI 1 "s_register_operand" "t")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-  "fsitos%?\\t%0, %1"
+  "vcvt%?.f32.s32\\t%0, %1"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
    (set_attr "type" "f_cvti2f")]
@@ -1063,7 +1063,7 @@
   [(set (match_operand:DF	    0 "s_register_operand" "=w")
 	(float:DF (match_operand:SI 1 "s_register_operand" "t")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-  "fsitod%?\\t%P0, %1"
+  "vcvt%?.f64.s32\\t%P0, %1"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
    (set_attr "type" "f_cvti2f")]
@@ -1074,7 +1074,7 @@
   [(set (match_operand:SF	    0 "s_register_operand" "=t")
 	(unsigned_float:SF (match_operand:SI 1 "s_register_operand" "t")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-  "fuitos%?\\t%0, %1"
+  "vcvt%?.f32.u32\\t%0, %1"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
    (set_attr "type" "f_cvti2f")]
@@ -1084,7 +1084,7 @@
   [(set (match_operand:DF	    0 "s_register_operand" "=w")
 	(unsigned_float:DF (match_operand:SI 1 "s_register_operand" "t")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-  "fuitod%?\\t%P0, %1"
+  "vcvt%?.f64.u32\\t%P0, %1"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
    (set_attr "type" "f_cvti2f")]
@@ -1097,7 +1097,7 @@
   [(set (match_operand:SF	   0 "s_register_operand" "=t")
 	(sqrt:SF (match_operand:SF 1 "s_register_operand" "t")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-  "fsqrts%?\\t%0, %1"
+  "vsqrt%?.f32\\t%0, %1"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
    (set_attr "type" "fsqrts")]
@@ -1107,7 +1107,7 @@
   [(set (match_operand:DF	   0 "s_register_operand" "=w")
 	(sqrt:DF (match_operand:DF 1 "s_register_operand" "w")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-  "fsqrtd%?\\t%P0, %P1"
+  "vsqrt%?.f64\\t%P0, %P1"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
    (set_attr "type" "fsqrtd")]
@@ -1120,7 +1120,7 @@
   [(set (reg CC_REGNUM)
 	(reg VFPCC_REGNUM))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-  "fmstat%?"
+  "vmrs%?\\tAPSR_nzcv, FPSCR"
   [(set_attr "conds" "set")
    (set_attr "type" "f_flag")]
 )
@@ -1188,6 +1188,9 @@
 
 ;; Comparison patterns
 
+;; In the compare with FP zero case the ARM Architecture Reference Manual
+;; specifies the immediate to be #0.0.  However, some buggy assemblers only
+;; accept #0.  We don't want to autodetect broken assemblers, so output #0.
 (define_insn "*cmpsf_vfp"
   [(set (reg:CCFP VFPCC_REGNUM)
 	(compare:CCFP (match_operand:SF 0 "s_register_operand"  "t,t")
@@ -1194,8 +1197,8 @@
 		      (match_operand:SF 1 "vfp_compare_operand" "t,G")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "@
-   fcmps%?\\t%0, %1
-   fcmpzs%?\\t%0"
+   vcmp%?.f32\\t%0, %1
+   vcmp%?.f32\\t%0, #0"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
    (set_attr "type" "fcmps")]
@@ -1207,8 +1210,8 @@
 		       (match_operand:SF 1 "vfp_compare_operand" "t,G")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "@
-   fcmpes%?\\t%0, %1
-   fcmpezs%?\\t%0"
+   vcmpe%?.f32\\t%0, %1
+   vcmpe%?.f32\\t%0, #0"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
    (set_attr "type" "fcmps")]
@@ -1220,8 +1223,8 @@
 		      (match_operand:DF 1 "vfp_compare_operand" "w,G")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   "@
-   fcmpd%?\\t%P0, %P1
-   fcmpzd%?\\t%P0"
+   vcmp%?.f64\\t%P0, %P1
+   vcmp%?.f64\\t%P0, #0"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
    (set_attr "type" "fcmpd")]
@@ -1233,8 +1236,8 @@
 		       (match_operand:DF 1 "vfp_compare_operand" "w,G")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   "@
-   fcmped%?\\t%P0, %P1
-   fcmpezd%?\\t%P0"
+   vcmpe%?.f64\\t%P0, %P1
+   vcmpe%?.f64\\t%P0, #0"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
    (set_attr "type" "fcmpd")]
@@ -1295,7 +1298,7 @@
 	  (unspec:BLK [(match_operand:DF 1 "vfp_register_operand" "")]
 		      UNSPEC_PUSH_MULT))])]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-  "* return vfp_output_fstmd (operands);"
+  "* return vfp_output_vstmd (operands);"
   [(set_attr "type" "f_stored")]
 )
 
@@ -1308,7 +1311,7 @@
         (unspec:SDF [(match_operand:SDF 1
 		         "register_operand" "<F_constraint>")]
          VRINT))]
-  "TARGET_HARD_FLOAT && TARGET_FPU_ARMV8 <vfp_double_cond>"
+  "TARGET_HARD_FLOAT && TARGET_VFP5 <vfp_double_cond>"
   "vrint<vrint_variant>%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1"
   [(set_attr "predicable" "<vrint_predicable>")
    (set_attr "predicable_short_it" "no")
@@ -1316,6 +1319,18 @@
    (set_attr "conds" "<vrint_conds>")]
 )
 
+;; Implements the lround, lfloor and lceil optabs.
+(define_insn "l<vrint_pattern><su_optab><mode>si2"
+  [(set (match_operand:SI 0 "register_operand" "=t")
+        (FIXUORS:SI (unspec:SDF
+                        [(match_operand:SDF 1
+                           "register_operand" "<F_constraint>")] VCVT)))]
+  "TARGET_HARD_FLOAT && TARGET_FPU_ARMV8 <vfp_double_cond>"
+  "vcvt<vrint_variant>%?.<su>32.<V_if_elem>\\t%0, %<V_reg>1"
+  [(set_attr "predicable" "no")
+   (set_attr "type" "f_cvtf2i")]
+)
+
 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
 ;; operand will be returned when both operands are zero (i.e. they may not
@@ -1327,7 +1342,7 @@
   [(set (match_operand:SDF 0 "register_operand" "=<F_constraint>")
         (smax:SDF (match_operand:SDF 1 "register_operand" "<F_constraint>")
 		  (match_operand:SDF 2 "register_operand" "<F_constraint>")))]
-  "TARGET_HARD_FLOAT && TARGET_FPU_ARMV8 <vfp_double_cond>"
+  "TARGET_HARD_FLOAT && TARGET_VFP5 <vfp_double_cond>"
   "vmaxnm.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
   [(set_attr "type" "f_minmax<vfp_type>")
    (set_attr "conds" "unconditional")]
@@ -1337,12 +1352,28 @@
   [(set (match_operand:SDF 0 "register_operand" "=<F_constraint>")
         (smin:SDF (match_operand:SDF 1 "register_operand" "<F_constraint>")
 		  (match_operand:SDF 2 "register_operand" "<F_constraint>")))]
-  "TARGET_HARD_FLOAT && TARGET_FPU_ARMV8 <vfp_double_cond>"
+  "TARGET_HARD_FLOAT && TARGET_VFP5 <vfp_double_cond>"
   "vminnm.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
   [(set_attr "type" "f_minmax<vfp_type>")
    (set_attr "conds" "unconditional")]
 )
 
+;; Write Floating-point Status and Control Register.
+(define_insn "set_fpscr"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] VUNSPEC_SET_FPSCR)]
+  "TARGET_VFP && TARGET_HARD_FLOAT"
+  "mcr\\tp10, 7, %0, cr1, cr0, 0\\t @SET_FPSCR"
+  [(set_attr "type" "mrs")])
+
+;; Read Floating-point Status and Control Register.
+(define_insn "get_fpscr"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec_volatile:SI [(const_int 0)] VUNSPEC_GET_FPSCR))]
+  "TARGET_VFP && TARGET_HARD_FLOAT"
+  "mrc\\tp10, 7, %0, cr1, cr0, 0\\t @GET_FPSCR"
+  [(set_attr "type" "mrs")])
+
+
 ;; Unimplemented insns:
 ;; fldm*
 ;; fstm*
--- a/src/gcc/config/arm/neon.md
+++ b/src/gcc/config/arm/neon.md
@@ -296,7 +296,7 @@
 		    UNSPEC_MISALIGNED_ACCESS))]
   "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
   "vld1.<V_sz_elem>\t{%q0}, %A1"
-  [(set_attr "type" "neon_store1_1reg<q>")])
+  [(set_attr "type" "neon_load1_1reg<q>")])
 
 (define_insn "vec_set<mode>_internal"
   [(set (match_operand:VD 0 "s_register_operand" "=w,w")
@@ -629,6 +629,17 @@
   [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
 )
 
+(define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
+  [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
+	(FIXUORS:<V_cmp_result> (unspec:VCVTF
+			       [(match_operand:VCVTF 1 "register_operand" "w")]
+			       NEON_VCVT)))]
+  "TARGET_NEON && TARGET_FPU_ARMV8"
+  "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
+  [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
+   (set_attr "predicable" "no")]
+)
+
 (define_insn "ior<mode>3"
   [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
 	(ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
@@ -1041,7 +1052,9 @@
       }
     else
       {
-	if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1)
+	if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1
+	    && (!reg_overlap_mentioned_p (operands[0], operands[1])
+		|| REGNO (operands[0]) == REGNO (operands[1])))
 	  /* This clobbers CC.  */
 	  emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
 	else
@@ -1141,7 +1154,9 @@
       }
     else
       {
-	if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1)
+	if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1
+	    && (!reg_overlap_mentioned_p (operands[0], operands[1])
+		|| REGNO (operands[0]) == REGNO (operands[1])))
 	  /* This clobbers CC.  */
 	  emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1]));
 	else
@@ -1334,33 +1349,47 @@
 
 ;; Reduction operations
 
-(define_expand "reduc_splus_<mode>"
-  [(match_operand:VD 0 "s_register_operand" "")
+(define_expand "reduc_plus_scal_<mode>"
+  [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
    (match_operand:VD 1 "s_register_operand" "")]
   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
 {
-  neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
+  rtx vec = gen_reg_rtx (<MODE>mode);
+  neon_pairwise_reduce (vec, operands[1], <MODE>mode,
 			&gen_neon_vpadd_internal<mode>);
+  /* The same result is actually computed into every element.  */
+  emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
   DONE;
 })
 
-(define_expand "reduc_splus_<mode>"
-  [(match_operand:VQ 0 "s_register_operand" "")
+(define_expand "reduc_plus_scal_<mode>"
+  [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
    (match_operand:VQ 1 "s_register_operand" "")]
   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
    && !BYTES_BIG_ENDIAN"
 {
   rtx step1 = gen_reg_rtx (<V_HALF>mode);
-  rtx res_d = gen_reg_rtx (<V_HALF>mode);
 
   emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
-  emit_insn (gen_reduc_splus_<V_half> (res_d, step1));
-  emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
+  emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1));
 
   DONE;
 })
 
-(define_insn "reduc_splus_v2di"
+(define_expand "reduc_plus_scal_v2di"
+  [(match_operand:DI 0 "nonimmediate_operand" "=w")
+   (match_operand:V2DI 1 "s_register_operand" "")]
+  "TARGET_NEON && !BYTES_BIG_ENDIAN"
+{
+  rtx vec = gen_reg_rtx (V2DImode);
+
+  emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
+  emit_insn (gen_vec_extractv2di (operands[0], vec, const0_rtx));
+
+  DONE;
+})
+
+(define_insn "arm_reduc_plus_internal_v2di"
   [(set (match_operand:V2DI 0 "s_register_operand" "=w")
 	(unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
 		     UNSPEC_VPADD))]
@@ -1369,115 +1398,109 @@
   [(set_attr "type" "neon_add_q")]
 )
 
-;; NEON does not distinguish between signed and unsigned addition except on
-;; widening operations.
-(define_expand "reduc_uplus_<mode>"
-  [(match_operand:VDQI 0 "s_register_operand" "")
-   (match_operand:VDQI 1 "s_register_operand" "")]
-  "TARGET_NEON && (<Is_d_reg> || !BYTES_BIG_ENDIAN)"
-{
-  emit_insn (gen_reduc_splus_<mode> (operands[0], operands[1]));
-  DONE;
-})
-
-(define_expand "reduc_smin_<mode>"
-  [(match_operand:VD 0 "s_register_operand" "")
+(define_expand "reduc_smin_scal_<mode>"
+  [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
    (match_operand:VD 1 "s_register_operand" "")]
   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
 {
-  neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
+  rtx vec = gen_reg_rtx (<MODE>mode);
+
+  neon_pairwise_reduce (vec, operands[1], <MODE>mode,
 			&gen_neon_vpsmin<mode>);
+  /* The result is computed into every element of the vector.  */
+  emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
   DONE;
 })
 
-(define_expand "reduc_smin_<mode>"
-  [(match_operand:VQ 0 "s_register_operand" "")
+(define_expand "reduc_smin_scal_<mode>"
+  [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
    (match_operand:VQ 1 "s_register_operand" "")]
   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
    && !BYTES_BIG_ENDIAN"
 {
   rtx step1 = gen_reg_rtx (<V_HALF>mode);
-  rtx res_d = gen_reg_rtx (<V_HALF>mode);
 
   emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
-  emit_insn (gen_reduc_smin_<V_half> (res_d, step1));
-  emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
+  emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
 
   DONE;
 })
 
-(define_expand "reduc_smax_<mode>"
-  [(match_operand:VD 0 "s_register_operand" "")
+(define_expand "reduc_smax_scal_<mode>"
+  [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
    (match_operand:VD 1 "s_register_operand" "")]
   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
 {
-  neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
+  rtx vec = gen_reg_rtx (<MODE>mode);
+  neon_pairwise_reduce (vec, operands[1], <MODE>mode,
 			&gen_neon_vpsmax<mode>);
+  /* The result is computed into every element of the vector.  */
+  emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
   DONE;
 })
 
-(define_expand "reduc_smax_<mode>"
-  [(match_operand:VQ 0 "s_register_operand" "")
+(define_expand "reduc_smax_scal_<mode>"
+  [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
    (match_operand:VQ 1 "s_register_operand" "")]
   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
    && !BYTES_BIG_ENDIAN"
 {
   rtx step1 = gen_reg_rtx (<V_HALF>mode);
-  rtx res_d = gen_reg_rtx (<V_HALF>mode);
 
   emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
-  emit_insn (gen_reduc_smax_<V_half> (res_d, step1));
-  emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
+  emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
 
   DONE;
 })
 
-(define_expand "reduc_umin_<mode>"
-  [(match_operand:VDI 0 "s_register_operand" "")
+(define_expand "reduc_umin_scal_<mode>"
+  [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
    (match_operand:VDI 1 "s_register_operand" "")]
   "TARGET_NEON"
 {
-  neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
+  rtx vec = gen_reg_rtx (<MODE>mode);
+  neon_pairwise_reduce (vec, operands[1], <MODE>mode,
 			&gen_neon_vpumin<mode>);
+  /* The result is computed into every element of the vector.  */
+  emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
   DONE;
 })
 
-(define_expand "reduc_umin_<mode>"
-  [(match_operand:VQI 0 "s_register_operand" "")
+(define_expand "reduc_umin_scal_<mode>"
+  [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
    (match_operand:VQI 1 "s_register_operand" "")]
   "TARGET_NEON && !BYTES_BIG_ENDIAN"
 {
   rtx step1 = gen_reg_rtx (<V_HALF>mode);
-  rtx res_d = gen_reg_rtx (<V_HALF>mode);
 
   emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
-  emit_insn (gen_reduc_umin_<V_half> (res_d, step1));
-  emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
+  emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
 
   DONE;
 })
 
-(define_expand "reduc_umax_<mode>"
-  [(match_operand:VDI 0 "s_register_operand" "")
+(define_expand "reduc_umax_scal_<mode>"
+  [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
    (match_operand:VDI 1 "s_register_operand" "")]
   "TARGET_NEON"
 {
-  neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
+  rtx vec = gen_reg_rtx (<MODE>mode);
+  neon_pairwise_reduce (vec, operands[1], <MODE>mode,
 			&gen_neon_vpumax<mode>);
+  /* The result is computed into every element of the vector.  */
+  emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
   DONE;
 })
 
-(define_expand "reduc_umax_<mode>"
-  [(match_operand:VQI 0 "s_register_operand" "")
+(define_expand "reduc_umax_scal_<mode>"
+  [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
    (match_operand:VQI 1 "s_register_operand" "")]
   "TARGET_NEON && !BYTES_BIG_ENDIAN"
 {
   rtx step1 = gen_reg_rtx (<V_HALF>mode);
-  rtx res_d = gen_reg_rtx (<V_HALF>mode);
 
   emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
-  emit_insn (gen_reduc_umax_<V_half> (res_d, step1));
-  emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
+  emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
 
   DONE;
 })
@@ -1842,9 +1865,9 @@
 ; good for plain vadd, vaddq.
 
 (define_expand "neon_vadd<mode>"
-  [(match_operand:VDQX 0 "s_register_operand" "=w")
-   (match_operand:VDQX 1 "s_register_operand" "w")
-   (match_operand:VDQX 2 "s_register_operand" "w")
+  [(match_operand:VCVTF 0 "s_register_operand" "=w")
+   (match_operand:VCVTF 1 "s_register_operand" "w")
+   (match_operand:VCVTF 2 "s_register_operand" "w")
    (match_operand:SI 3 "immediate_operand" "i")]
   "TARGET_NEON"
 {
@@ -1869,9 +1892,9 @@
 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
 
 (define_insn "neon_vadd<mode>_unspec"
-  [(set (match_operand:VDQX 0 "s_register_operand" "=w")
-        (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
-		      (match_operand:VDQX 2 "s_register_operand" "w")]
+  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
+        (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
+		      (match_operand:VCVTF 2 "s_register_operand" "w")]
                      UNSPEC_VADD))]
   "TARGET_NEON"
   "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
@@ -2132,9 +2155,9 @@
 )
 
 (define_expand "neon_vsub<mode>"
-  [(match_operand:VDQX 0 "s_register_operand" "=w")
-   (match_operand:VDQX 1 "s_register_operand" "w")
-   (match_operand:VDQX 2 "s_register_operand" "w")
+  [(match_operand:VCVTF 0 "s_register_operand" "=w")
+   (match_operand:VCVTF 1 "s_register_operand" "w")
+   (match_operand:VCVTF 2 "s_register_operand" "w")
    (match_operand:SI 3 "immediate_operand" "i")]
   "TARGET_NEON"
 {
@@ -2149,9 +2172,9 @@
 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
 
 (define_insn "neon_vsub<mode>_unspec"
-  [(set (match_operand:VDQX 0 "s_register_operand" "=w")
-        (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
-		      (match_operand:VDQX 2 "s_register_operand" "w")]
+  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
+        (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
+		      (match_operand:VCVTF 2 "s_register_operand" "w")]
                      UNSPEC_VSUB))]
   "TARGET_NEON"
   "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
@@ -2547,6 +2570,14 @@
   [(set_attr "type" "neon_qabs<q>")]
 )
 
+(define_insn "neon_bswap<mode>"
+  [(set (match_operand:VDQHSD 0 "register_operand" "=w")
+        (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
+  "TARGET_NEON"
+  "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
+  [(set_attr "type" "neon_rev<q>")]
+)
+
 (define_expand "neon_vneg<mode>"
   [(match_operand:VDQW 0 "s_register_operand" "")
    (match_operand:VDQW 1 "s_register_operand" "")
@@ -2557,6 +2588,33 @@
   DONE;
 })
 
+(define_expand "neon_copysignf<mode>"
+  [(match_operand:VCVTF 0 "register_operand")
+   (match_operand:VCVTF 1 "register_operand")
+   (match_operand:VCVTF 2 "register_operand")]
+  "TARGET_NEON"
+  "{
+     rtx v_bitmask_cast;
+     rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
+     int i, n_elt = GET_MODE_NUNITS (<MODE>mode);
+     rtvec v = rtvec_alloc (n_elt);
+
+     /* Create bitmask for vector select.  */
+     for (i = 0; i < n_elt; ++i)
+       RTVEC_ELT (v, i) = GEN_INT (0x80000000);
+
+     emit_move_insn (v_bitmask,
+		     gen_rtx_CONST_VECTOR (<VCVTF:V_cmp_result>mode, v));
+     emit_move_insn (operands[0], operands[2]);
+     v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
+					   <VCVTF:V_cmp_result>mode, 0);
+     emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
+				     operands[1]));
+
+     DONE;
+  }"
+)
+
 (define_insn "neon_vqneg<mode>"
   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
 	(unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
@@ -4140,17 +4198,6 @@
   [(set_attr "type" "neon_permute<q>")]
 )
 
-(define_expand "neon_vtrn<mode>"
-  [(match_operand:SI 0 "s_register_operand" "r")
-   (match_operand:VDQW 1 "s_register_operand" "w")
-   (match_operand:VDQW 2 "s_register_operand" "w")]
-  "TARGET_NEON"
-{
-  neon_emit_pair_result_insn (<MODE>mode, gen_neon_vtrn<mode>_internal,
-			      operands[0], operands[1], operands[2]);
-  DONE;
-})
-
 (define_expand "neon_vzip<mode>_internal"
   [(parallel
     [(set (match_operand:VDQW 0 "s_register_operand" "")
@@ -4177,17 +4224,6 @@
   [(set_attr "type" "neon_zip<q>")]
 )
 
-(define_expand "neon_vzip<mode>"
-  [(match_operand:SI 0 "s_register_operand" "r")
-   (match_operand:VDQW 1 "s_register_operand" "w")
-   (match_operand:VDQW 2 "s_register_operand" "w")]
-  "TARGET_NEON"
-{
-  neon_emit_pair_result_insn (<MODE>mode, gen_neon_vzip<mode>_internal,
-			      operands[0], operands[1], operands[2]);
-  DONE;
-})
-
 (define_expand "neon_vuzp<mode>_internal"
   [(parallel
     [(set (match_operand:VDQW 0 "s_register_operand" "")
@@ -4214,17 +4250,6 @@
   [(set_attr "type" "neon_zip<q>")]
 )
 
-(define_expand "neon_vuzp<mode>"
-  [(match_operand:SI 0 "s_register_operand" "r")
-   (match_operand:VDQW 1 "s_register_operand" "w")
-   (match_operand:VDQW 2 "s_register_operand" "w")]
-  "TARGET_NEON"
-{
-  neon_emit_pair_result_insn (<MODE>mode, gen_neon_vuzp<mode>_internal,
-			      operands[0], operands[1], operands[2]);
-  DONE;
-})
-
 (define_expand "neon_vreinterpretv8qi<mode>"
   [(match_operand:V8QI 0 "s_register_operand" "")
    (match_operand:VDX 1 "s_register_operand" "")]
@@ -5357,61 +5382,6 @@
   [(set_attr "type" "neon_store4_4reg<q>")]
 )
 
-(define_expand "neon_vand<mode>"
-  [(match_operand:VDQX 0 "s_register_operand" "")
-   (match_operand:VDQX 1 "s_register_operand" "")
-   (match_operand:VDQX 2 "neon_inv_logic_op2" "")
-   (match_operand:SI 3 "immediate_operand" "")]
-  "TARGET_NEON"
-{
-  emit_insn (gen_and<mode>3 (operands[0], operands[1], operands[2]));
-  DONE;
-})
-
-(define_expand "neon_vorr<mode>"
-  [(match_operand:VDQX 0 "s_register_operand" "")
-   (match_operand:VDQX 1 "s_register_operand" "")
-   (match_operand:VDQX 2 "neon_logic_op2" "")
-   (match_operand:SI 3 "immediate_operand" "")]
-  "TARGET_NEON"
-{
-  emit_insn (gen_ior<mode>3 (operands[0], operands[1], operands[2]));
-  DONE;
-})
-
-(define_expand "neon_veor<mode>"
-  [(match_operand:VDQX 0 "s_register_operand" "")
-   (match_operand:VDQX 1 "s_register_operand" "")
-   (match_operand:VDQX 2 "s_register_operand" "")
-   (match_operand:SI 3 "immediate_operand" "")]
-  "TARGET_NEON"
-{
-  emit_insn (gen_xor<mode>3 (operands[0], operands[1], operands[2]));
-  DONE;
-})
-
-(define_expand "neon_vbic<mode>"
-  [(match_operand:VDQX 0 "s_register_operand" "")
-   (match_operand:VDQX 1 "s_register_operand" "")
-   (match_operand:VDQX 2 "neon_logic_op2" "")
-   (match_operand:SI 3 "immediate_operand" "")]
-  "TARGET_NEON"
-{
-  emit_insn (gen_bic<mode>3_neon (operands[0], operands[1], operands[2]));
-  DONE;
-})
-
-(define_expand "neon_vorn<mode>"
-  [(match_operand:VDQX 0 "s_register_operand" "")
-   (match_operand:VDQX 1 "s_register_operand" "")
-   (match_operand:VDQX 2 "neon_inv_logic_op2" "")
-   (match_operand:SI 3 "immediate_operand" "")]
-  "TARGET_NEON"
-{
-  emit_insn (gen_orn<mode>3_neon (operands[0], operands[1], operands[2]));
-  DONE;
-})
-
 (define_insn "neon_vec_unpack<US>_lo_<mode>"
   [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
         (SE:<V_unpack> (vec_select:<V_HALF>
--- a/src/gcc/config/arm/types.md
+++ b/src/gcc/config/arm/types.md
@@ -66,7 +66,6 @@
 ; f_mrc              transfer vfp to arm reg.
 ; f_mrrc             transfer vfp to two arm regs.
 ; f_rint[d,s]        double/single floating point rount to integral.
-; f_sel[d,s]         double/single floating byte select.
 ; f_store[d,s]       double/single store to memory.  Used for VFP unit.
 ; fadd[d,s]          double/single floating-point scalar addition.
 ; fcmp[d,s]          double/single floating-point compare.
@@ -571,8 +570,6 @@
   f_mrrc,\
   f_rintd,\
   f_rints,\
-  f_seld,\
-  f_sels,\
   f_stored,\
   f_stores,\
   faddd,\
--- a/src/gcc/config/arm/arm_neon_builtins.def
+++ b/src/gcc/config/arm/arm_neon_builtins.def
@@ -18,8 +18,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-VAR10 (BINOP, vadd,
-	v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+VAR2 (BINOP, vadd, v2sf, v4sf),
 VAR3 (BINOP, vaddl, v8qi, v4hi, v2si),
 VAR3 (BINOP, vaddw, v8qi, v4hi, v2si),
 VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
@@ -54,7 +53,7 @@
 VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
 VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si),
 VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
-VAR10 (BINOP, vsub, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+VAR2 (BINOP, vsub, v2sf, v4sf),
 VAR3 (BINOP, vsubl, v8qi, v4hi, v2si),
 VAR3 (BINOP, vsubw, v8qi, v4hi, v2si),
 VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
@@ -89,6 +88,7 @@
 VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
 VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
 VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
+VAR5 (BSWAP, bswap, v4hi, v8hi, v2si, v4si, v2di),
 VAR2 (UNOP, vcnt, v8qi, v16qi),
 VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf),
 VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf),
@@ -135,6 +135,7 @@
 VAR1 (FLOAT_NARROW, vcvtv4hf, v4sf),
 VAR10 (SELECT, vbsl,
 	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+VAR2 (COPYSIGNF, copysignf, v2sf, v4sf),
 VAR2 (RINT, vrintn, v2sf, v4sf),
 VAR2 (RINT, vrinta, v2sf, v4sf),
 VAR2 (RINT, vrintp, v2sf, v4sf),
@@ -141,6 +142,18 @@
 VAR2 (RINT, vrintm, v2sf, v4sf),
 VAR2 (RINT, vrintz, v2sf, v4sf),
 VAR2 (RINT, vrintx, v2sf, v4sf),
+VAR1 (RINT, vcvtav2sf, v2si),
+VAR1 (RINT, vcvtav4sf, v4si),
+VAR1 (RINT, vcvtauv2sf, v2si),
+VAR1 (RINT, vcvtauv4sf, v4si),
+VAR1 (RINT, vcvtpv2sf, v2si),
+VAR1 (RINT, vcvtpv4sf, v4si),
+VAR1 (RINT, vcvtpuv2sf, v2si),
+VAR1 (RINT, vcvtpuv4sf, v4si),
+VAR1 (RINT, vcvtmv2sf, v2si),
+VAR1 (RINT, vcvtmv4sf, v4si),
+VAR1 (RINT, vcvtmuv2sf, v2si),
+VAR1 (RINT, vcvtmuv4sf, v4si),
 VAR1 (VTBL, vtbl1, v8qi),
 VAR1 (VTBL, vtbl2, v8qi),
 VAR1 (VTBL, vtbl3, v8qi),
@@ -149,9 +162,6 @@
 VAR1 (VTBX, vtbx2, v8qi),
 VAR1 (VTBX, vtbx3, v8qi),
 VAR1 (VTBX, vtbx4, v8qi),
-VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
-VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
-VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
 VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di),
 VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di),
 VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di),
@@ -199,14 +209,4 @@
 VAR9 (STORESTRUCT, vst4,
 	v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
 VAR7 (STORESTRUCTLANE, vst4_lane,
-	v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
-VAR10 (LOGICBINOP, vand,
-	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
-VAR10 (LOGICBINOP, vorr,
-	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
-VAR10 (BINOP, veor,
-	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
-VAR10 (LOGICBINOP, vbic,
-	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
-VAR10 (LOGICBINOP, vorn,
-	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
+	v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
--- a/src/gcc/config/arm/neon.ml
+++ b/src/gcc/config/arm/neon.ml
@@ -294,6 +294,8 @@
     (* Mark that the intrinsic requires a particular bit in __ARM_FP to
     be set.   *)
   | Requires_FP_bit of int
+    (* Compiler optimization level for the test.  *)
+  | Compiler_optim of string
 
 exception MixedMode of elts * elts
 
@@ -1941,18 +1943,18 @@
     Veor, [], All (3, Qreg), "veorQ", notype_2, su_8_64;
 
     (* Bic (And-not).  *)
-    Vbic, [], All (3, Dreg), "vbic", notype_2, su_8_32;
-    Vbic, [No_op], All (3, Dreg), "vbic", notype_2, [S64; U64];
-    Vbic, [], All (3, Qreg), "vbicQ", notype_2, su_8_64;
+    Vbic, [Compiler_optim "-O2"], All (3, Dreg), "vbic", notype_2, su_8_32;
+    Vbic, [No_op; Compiler_optim "-O2"], All (3, Dreg), "vbic", notype_2, [S64; U64];
+    Vbic, [Compiler_optim "-O2"], All (3, Qreg), "vbicQ", notype_2, su_8_64;
 
     (* Or-not.  *)
-    Vorn, [], All (3, Dreg), "vorn", notype_2, su_8_32;
-    Vorn, [No_op], All (3, Dreg), "vorn", notype_2, [S64; U64];
-    Vorn, [], All (3, Qreg), "vornQ", notype_2, su_8_64;
+    Vorn, [Compiler_optim "-O2"], All (3, Dreg), "vorn", notype_2, su_8_32;
+    Vorn, [No_op; Compiler_optim "-O2"], All (3, Dreg), "vorn", notype_2, [S64; U64];
+    Vorn, [Compiler_optim "-O2"], All (3, Qreg), "vornQ", notype_2, su_8_64;
   ]
 
 let type_in_crypto_only t
-  = (t == P64) or (t == P128)
+  = (t == P64) || (t == P128)
 
 let cross_product s1 s2
   = List.filter (fun (e, e') -> e <> e')
@@ -1963,7 +1965,7 @@
   let casts = cross_product elems elems in
   List.map
     (fun (convto, convfrom) ->
-       Vreinterp, (if (type_in_crypto_only convto) or (type_in_crypto_only convfrom)
+       Vreinterp, (if (type_in_crypto_only convto) || (type_in_crypto_only convfrom)
                    then [Requires_feature "CRYPTO"] else []) @ [No_op], Use_operands [| Dreg; Dreg |],
                    "vreinterpret", conv_1, [Cast (convto, convfrom)])
     casts
@@ -1973,7 +1975,7 @@
   let casts = cross_product elems elems in
   List.map
     (fun (convto, convfrom) ->
-       Vreinterp, (if (type_in_crypto_only convto) or (type_in_crypto_only convfrom)
+       Vreinterp, (if (type_in_crypto_only convto) || (type_in_crypto_only convfrom)
                    then [Requires_feature "CRYPTO"] else []) @ [No_op], Use_operands [| Qreg; Qreg |],
                    "vreinterpretQ", conv_1, [Cast (convto, convfrom)])
     casts
--- a/src/gcc/config/arm/cortex-a7.md
+++ b/src/gcc/config/arm/cortex-a7.md
@@ -137,7 +137,7 @@
   (and (eq_attr "tune" "cortexa7")
        (eq_attr "type" "alu_reg,alus_reg,logic_reg,logics_reg,\
                         adc_imm,adcs_imm,adc_reg,adcs_reg,\
-                        bfm,rev,\
+                        bfm,clz,rbit,rev,\
                         shift_imm,shift_reg,mov_reg,mvn_reg"))
   "cortex_a7_ex1")
 
--- a/src/gcc/config/arm/aarch-common-protos.h
+++ b/src/gcc/config/arm/aarch-common-protos.h
@@ -24,6 +24,9 @@
 #define GCC_AARCH_COMMON_PROTOS_H
 
 extern int aarch_crypto_can_dual_issue (rtx, rtx);
+extern bool aarch_rev16_p (rtx);
+extern bool aarch_rev16_shleft_mask_imm_p (rtx, enum machine_mode);
+extern bool aarch_rev16_shright_mask_imm_p (rtx, enum machine_mode);
 extern int arm_early_load_addr_dep (rtx, rtx);
 extern int arm_early_store_addr_dep (rtx, rtx);
 extern int arm_mac_accumulator_is_mul_result (rtx, rtx);
@@ -54,6 +57,7 @@
   const int bfi;		/* Bit-field insert.  */
   const int bfx;		/* Bit-field extraction.  */
   const int clz;		/* Count Leading Zeros.  */
+  const int rev;		/* Reverse bits/bytes.  */
   const int non_exec;		/* Extra cost when not executing insn.  */
   const bool non_exec_costs_exec; /* True if non-execution must add the exec
 				     cost.  */
--- a/src/gcc/config/arm/t-arm
+++ b/src/gcc/config/arm/t-arm
@@ -40,6 +40,7 @@
 		$(srcdir)/config/arm/cortex-a9.md \
 		$(srcdir)/config/arm/cortex-a9-neon.md \
 		$(srcdir)/config/arm/cortex-a53.md \
+		$(srcdir)/config/arm/xgene1.md \
 		$(srcdir)/config/arm/cortex-m4-fpu.md \
 		$(srcdir)/config/arm/cortex-m4.md \
 		$(srcdir)/config/arm/cortex-r4f.md \
@@ -90,7 +91,8 @@
   $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \
   $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \
   $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \
-  intl.h libfuncs.h $(PARAMS_H) $(OPTS_H) $(srcdir)/config/arm/arm-cores.def \
+  intl.h libfuncs.h $(PARAMS_H) $(OPTS_H) sched-int.h \
+  $(srcdir)/config/arm/arm-cores.def \
   $(srcdir)/config/arm/arm-arches.def $(srcdir)/config/arm/arm-fpus.def \
   $(srcdir)/config/arm/arm_neon_builtins.def
 
--- a/src/gcc/config/arm/predicates.md
+++ b/src/gcc/config/arm/predicates.md
@@ -291,6 +291,15 @@
 			      || ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32")))
        (match_test "mode == GET_MODE (op)")))
 
+(define_special_predicate "shift_nomul_operator"
+  (and (ior (and (match_code "rotate")
+		 (match_test "CONST_INT_P (XEXP (op, 1))
+			      && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32"))
+	    (and (match_code "ashift,ashiftrt,lshiftrt,rotatert")
+		 (match_test "!CONST_INT_P (XEXP (op, 1))
+			      || ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32")))
+       (match_test "mode == GET_MODE (op)")))
+
 ;; True for shift operators which can be used with saturation instructions.
 (define_special_predicate "sat_shift_operator"
   (and (ior (and (match_code "mult")
@@ -681,5 +690,6 @@
        (match_code "reg" "0")))
 
 (define_predicate "call_insn_operand"
-  (ior (match_code "symbol_ref")
+  (ior (and (match_code "symbol_ref")
+	    (match_test "!arm_is_long_call_p (SYMBOL_REF_DECL (op))"))
        (match_operand 0 "s_register_operand")))
--- a/src/gcc/config/arm/cortex-a15-neon.md
+++ b/src/gcc/config/arm/cortex-a15-neon.md
@@ -655,11 +655,21 @@
        (eq_attr "type" "fmov"))
   "ca15_issue1,ca15_cx_perm")
 
-(define_insn_reservation "cortex_a15_vfp_to_from_gp" 5
+(define_insn_reservation "cortex_a15_gp_to_vfp" 5
   (and (eq_attr "tune" "cortexa15")
-       (eq_attr "type" "f_mcr, f_mcrr, f_mrc, f_mrrc"))
-  "ca15_issue1,ca15_ls1+ca15_ls2")
+       (eq_attr "type" "f_mcr, f_mcrr"))
+  "ca15_issue1,ca15_ls")
 
+(define_insn_reservation "cortex_a15_mov_vfp_to_gp" 5
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "type" "f_mrc, f_mrrc"))
+  "ca15_issue1,ca15_ls")
+
+;; Moves from floating point registers to general purpose registers
+;; induce additional latency.
+(define_bypass 10 "cortex_a15_vfp*, cortex_a15_neon*, cortex_a15_gp_to_vfp" "cortex_a15_mov_vfp_to_gp")
+
+
 (define_insn_reservation "cortex_a15_vfp_ariths" 7
   (and (eq_attr "tune" "cortexa15")
        (eq_attr "type" "ffariths"))
--- a/src/gcc/config/arm/arm_neon.h
+++ b/src/gcc/config/arm/arm_neon.h
@@ -452,114 +452,121 @@
 } poly64x2x4_t;
 #endif
 
-
-
+/* vadd  */
 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
 vadd_s8 (int8x8_t __a, int8x8_t __b)
 {
-  return (int8x8_t)__builtin_neon_vaddv8qi (__a, __b, 1);
+  return __a + __b;
 }
 
 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
 vadd_s16 (int16x4_t __a, int16x4_t __b)
 {
-  return (int16x4_t)__builtin_neon_vaddv4hi (__a, __b, 1);
+  return __a + __b;
 }
 
 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
 vadd_s32 (int32x2_t __a, int32x2_t __b)
 {
-  return (int32x2_t)__builtin_neon_vaddv2si (__a, __b, 1);
+  return __a + __b;
 }
 
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vadd_f32 (float32x2_t __a, float32x2_t __b)
 {
-  return (float32x2_t)__builtin_neon_vaddv2sf (__a, __b, 3);
+#ifdef __FAST_MATH
+  return __a + __b;
+#else
+  return (float32x2_t) __builtin_neon_vaddv2sf (__a, __b, 3);
+#endif
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vadd_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return (uint8x8_t)__builtin_neon_vaddv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+  return __a + __b;
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vadd_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return (uint16x4_t)__builtin_neon_vaddv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+  return __a + __b;
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vadd_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return (uint32x2_t)__builtin_neon_vaddv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+  return __a + __b;
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vadd_s64 (int64x1_t __a, int64x1_t __b)
 {
-  return (int64x1_t)__builtin_neon_vadddi (__a, __b, 1);
+  return __a + __b;
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vadd_u64 (uint64x1_t __a, uint64x1_t __b)
 {
-  return (uint64x1_t)__builtin_neon_vadddi ((int64x1_t) __a, (int64x1_t) __b, 0);
+  return __a + __b;
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
 vaddq_s8 (int8x16_t __a, int8x16_t __b)
 {
-  return (int8x16_t)__builtin_neon_vaddv16qi (__a, __b, 1);
+  return __a + __b;
 }
 
 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
 vaddq_s16 (int16x8_t __a, int16x8_t __b)
 {
-  return (int16x8_t)__builtin_neon_vaddv8hi (__a, __b, 1);
+  return __a + __b;
 }
 
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
 vaddq_s32 (int32x4_t __a, int32x4_t __b)
 {
-  return (int32x4_t)__builtin_neon_vaddv4si (__a, __b, 1);
+  return __a + __b;
 }
 
 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
 vaddq_s64 (int64x2_t __a, int64x2_t __b)
 {
-  return (int64x2_t)__builtin_neon_vaddv2di (__a, __b, 1);
+  return __a + __b;
 }
 
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vaddq_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return (float32x4_t)__builtin_neon_vaddv4sf (__a, __b, 3);
+#ifdef __FAST_MATH
+  return __a + __b;
+#else
+  return (float32x4_t) __builtin_neon_vaddv4sf (__a, __b, 3);
+#endif
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vaddq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return (uint8x16_t)__builtin_neon_vaddv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+  return __a + __b;
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vaddq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return (uint16x8_t)__builtin_neon_vaddv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+  return __a + __b;
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vaddq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return (uint32x4_t)__builtin_neon_vaddv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+  return __a + __b;
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vaddq_u64 (uint64x2_t __a, uint64x2_t __b)
 {
-  return (uint64x2_t)__builtin_neon_vaddv2di ((int64x2_t) __a, (int64x2_t) __b, 0);
+  return __a + __b;
 }
 
 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
@@ -949,93 +956,102 @@
 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
 vmul_s8 (int8x8_t __a, int8x8_t __b)
 {
-  return (int8x8_t)__builtin_neon_vmulv8qi (__a, __b, 1);
+  return __a * __b;
 }
 
 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
 vmul_s16 (int16x4_t __a, int16x4_t __b)
 {
-  return (int16x4_t)__builtin_neon_vmulv4hi (__a, __b, 1);
+  return __a * __b;
 }
 
 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
 vmul_s32 (int32x2_t __a, int32x2_t __b)
 {
-  return (int32x2_t)__builtin_neon_vmulv2si (__a, __b, 1);
+  return __a * __b;
 }
 
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vmul_f32 (float32x2_t __a, float32x2_t __b)
 {
-  return (float32x2_t)__builtin_neon_vmulv2sf (__a, __b, 3);
+#ifdef __FAST_MATH
+  return __a * __b;
+#else
+  return (float32x2_t) __builtin_neon_vmulv2sf (__a, __b, 3);
+#endif
+
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vmul_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return (uint8x8_t)__builtin_neon_vmulv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+  return __a * __b;
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vmul_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return (uint16x4_t)__builtin_neon_vmulv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+  return __a * __b;
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vmul_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return (uint32x2_t)__builtin_neon_vmulv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+  return __a * __b;
 }
 
-__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-vmul_p8 (poly8x8_t __a, poly8x8_t __b)
-{
-  return (poly8x8_t)__builtin_neon_vmulv8qi ((int8x8_t) __a, (int8x8_t) __b, 2);
-}
-
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
 vmulq_s8 (int8x16_t __a, int8x16_t __b)
 {
-  return (int8x16_t)__builtin_neon_vmulv16qi (__a, __b, 1);
+  return __a * __b;
 }
 
 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
 vmulq_s16 (int16x8_t __a, int16x8_t __b)
 {
-  return (int16x8_t)__builtin_neon_vmulv8hi (__a, __b, 1);
+  return __a * __b;
 }
 
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
 vmulq_s32 (int32x4_t __a, int32x4_t __b)
 {
-  return (int32x4_t)__builtin_neon_vmulv4si (__a, __b, 1);
+  return __a * __b;
 }
 
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vmulq_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return (float32x4_t)__builtin_neon_vmulv4sf (__a, __b, 3);
+#ifdef __FAST_MATH
+  return __a * __b;
+#else
+  return (float32x4_t) __builtin_neon_vmulv4sf (__a, __b, 3);
+#endif
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vmulq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return (uint8x16_t)__builtin_neon_vmulv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+  return __a * __b;
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vmulq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return (uint16x8_t)__builtin_neon_vmulv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+  return __a * __b;
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vmulq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return (uint32x4_t)__builtin_neon_vmulv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+  return __a * __b;
 }
 
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vmul_p8 (poly8x8_t __a, poly8x8_t __b)
+{
+  return (poly8x8_t)__builtin_neon_vmulv8qi ((int8x8_t) __a, (int8x8_t) __b, 2);
+}
+
 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
 vmulq_p8 (poly8x16_t __a, poly8x16_t __b)
 {
@@ -1520,112 +1536,121 @@
 }
 
 #endif
+
 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
 vsub_s8 (int8x8_t __a, int8x8_t __b)
 {
-  return (int8x8_t)__builtin_neon_vsubv8qi (__a, __b, 1);
+  return __a - __b;
 }
 
 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
 vsub_s16 (int16x4_t __a, int16x4_t __b)
 {
-  return (int16x4_t)__builtin_neon_vsubv4hi (__a, __b, 1);
+  return __a - __b;
 }
 
 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
 vsub_s32 (int32x2_t __a, int32x2_t __b)
 {
-  return (int32x2_t)__builtin_neon_vsubv2si (__a, __b, 1);
+  return __a - __b;
 }
 
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vsub_f32 (float32x2_t __a, float32x2_t __b)
 {
-  return (float32x2_t)__builtin_neon_vsubv2sf (__a, __b, 3);
+#ifdef __FAST_MATH
+  return __a - __b;
+#else
+  return (float32x2_t) __builtin_neon_vsubv2sf (__a, __b, 3);
+#endif
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vsub_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return (uint8x8_t)__builtin_neon_vsubv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+  return __a - __b;
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vsub_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return (uint16x4_t)__builtin_neon_vsubv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+  return __a - __b;
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vsub_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return (uint32x2_t)__builtin_neon_vsubv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+  return __a - __b;
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vsub_s64 (int64x1_t __a, int64x1_t __b)
 {
-  return (int64x1_t)__builtin_neon_vsubdi (__a, __b, 1);
+  return __a - __b;
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vsub_u64 (uint64x1_t __a, uint64x1_t __b)
 {
-  return (uint64x1_t)__builtin_neon_vsubdi ((int64x1_t) __a, (int64x1_t) __b, 0);
+  return __a - __b;
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
 vsubq_s8 (int8x16_t __a, int8x16_t __b)
 {
-  return (int8x16_t)__builtin_neon_vsubv16qi (__a, __b, 1);
+  return __a - __b;
 }
 
 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
 vsubq_s16 (int16x8_t __a, int16x8_t __b)
 {
-  return (int16x8_t)__builtin_neon_vsubv8hi (__a, __b, 1);
+  return __a - __b;
 }
 
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
 vsubq_s32 (int32x4_t __a, int32x4_t __b)
 {
-  return (int32x4_t)__builtin_neon_vsubv4si (__a, __b, 1);
+  return __a - __b;
 }
 
 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
 vsubq_s64 (int64x2_t __a, int64x2_t __b)
 {
-  return (int64x2_t)__builtin_neon_vsubv2di (__a, __b, 1);
+  return __a - __b;
 }
 
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vsubq_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return (float32x4_t)__builtin_neon_vsubv4sf (__a, __b, 3);
+#ifdef __FAST_MATH
+  return __a - __b;
+#else
+  return (float32x4_t) __builtin_neon_vsubv4sf (__a, __b, 3);
+#endif
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vsubq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return (uint8x16_t)__builtin_neon_vsubv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+  return __a - __b;
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vsubq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return (uint16x8_t)__builtin_neon_vsubv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+  return __a - __b;
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vsubq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return (uint32x4_t)__builtin_neon_vsubv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+  return __a - __b;
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vsubq_u64 (uint64x2_t __a, uint64x2_t __b)
 {
-  return (uint64x2_t)__builtin_neon_vsubv2di ((int64x2_t) __a, (int64x2_t) __b, 0);
+  return __a - __b;
 }
 
 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
@@ -11295,484 +11320,483 @@
 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
 vand_s8 (int8x8_t __a, int8x8_t __b)
 {
-  return (int8x8_t)__builtin_neon_vandv8qi (__a, __b, 1);
+  return __a & __b;
 }
 
 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
 vand_s16 (int16x4_t __a, int16x4_t __b)
 {
-  return (int16x4_t)__builtin_neon_vandv4hi (__a, __b, 1);
+  return __a & __b;
 }
 
 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
 vand_s32 (int32x2_t __a, int32x2_t __b)
 {
-  return (int32x2_t)__builtin_neon_vandv2si (__a, __b, 1);
+  return __a & __b;
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vand_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return (uint8x8_t)__builtin_neon_vandv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+  return __a & __b;
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vand_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return (uint16x4_t)__builtin_neon_vandv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+  return __a & __b;
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vand_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return (uint32x2_t)__builtin_neon_vandv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+  return __a & __b;
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vand_s64 (int64x1_t __a, int64x1_t __b)
 {
-  return (int64x1_t)__builtin_neon_vanddi (__a, __b, 1);
+  return __a & __b;
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vand_u64 (uint64x1_t __a, uint64x1_t __b)
 {
-  return (uint64x1_t)__builtin_neon_vanddi ((int64x1_t) __a, (int64x1_t) __b, 0);
+  return __a & __b;
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
 vandq_s8 (int8x16_t __a, int8x16_t __b)
 {
-  return (int8x16_t)__builtin_neon_vandv16qi (__a, __b, 1);
+  return __a & __b;
 }
 
 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
 vandq_s16 (int16x8_t __a, int16x8_t __b)
 {
-  return (int16x8_t)__builtin_neon_vandv8hi (__a, __b, 1);
+  return __a & __b;
 }
 
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
 vandq_s32 (int32x4_t __a, int32x4_t __b)
 {
-  return (int32x4_t)__builtin_neon_vandv4si (__a, __b, 1);
+  return __a & __b;
 }
 
 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
 vandq_s64 (int64x2_t __a, int64x2_t __b)
 {
-  return (int64x2_t)__builtin_neon_vandv2di (__a, __b, 1);
+  return __a & __b;
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vandq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return (uint8x16_t)__builtin_neon_vandv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+  return __a & __b;
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vandq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return (uint16x8_t)__builtin_neon_vandv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+  return __a & __b;
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vandq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return (uint32x4_t)__builtin_neon_vandv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+  return __a & __b;
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vandq_u64 (uint64x2_t __a, uint64x2_t __b)
 {
-  return (uint64x2_t)__builtin_neon_vandv2di ((int64x2_t) __a, (int64x2_t) __b, 0);
+  return __a & __b;
 }
 
 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
 vorr_s8 (int8x8_t __a, int8x8_t __b)
 {
-  return (int8x8_t)__builtin_neon_vorrv8qi (__a, __b, 1);
+  return __a | __b;
 }
 
 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
 vorr_s16 (int16x4_t __a, int16x4_t __b)
 {
-  return (int16x4_t)__builtin_neon_vorrv4hi (__a, __b, 1);
+  return __a | __b;
 }
 
 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
 vorr_s32 (int32x2_t __a, int32x2_t __b)
 {
-  return (int32x2_t)__builtin_neon_vorrv2si (__a, __b, 1);
+  return __a | __b;
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vorr_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return (uint8x8_t)__builtin_neon_vorrv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+  return __a | __b;
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vorr_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return (uint16x4_t)__builtin_neon_vorrv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+  return __a | __b;
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vorr_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return (uint32x2_t)__builtin_neon_vorrv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+  return __a | __b;
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vorr_s64 (int64x1_t __a, int64x1_t __b)
 {
-  return (int64x1_t)__builtin_neon_vorrdi (__a, __b, 1);
+  return __a | __b;
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vorr_u64 (uint64x1_t __a, uint64x1_t __b)
 {
-  return (uint64x1_t)__builtin_neon_vorrdi ((int64x1_t) __a, (int64x1_t) __b, 0);
+  return __a | __b;
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
 vorrq_s8 (int8x16_t __a, int8x16_t __b)
 {
-  return (int8x16_t)__builtin_neon_vorrv16qi (__a, __b, 1);
+  return __a | __b;
 }
 
 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
 vorrq_s16 (int16x8_t __a, int16x8_t __b)
 {
-  return (int16x8_t)__builtin_neon_vorrv8hi (__a, __b, 1);
+  return __a | __b;
 }
 
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
 vorrq_s32 (int32x4_t __a, int32x4_t __b)
 {
-  return (int32x4_t)__builtin_neon_vorrv4si (__a, __b, 1);
+  return __a | __b;
 }
 
 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
 vorrq_s64 (int64x2_t __a, int64x2_t __b)
 {
-  return (int64x2_t)__builtin_neon_vorrv2di (__a, __b, 1);
+  return __a | __b;
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vorrq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return (uint8x16_t)__builtin_neon_vorrv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+  return __a | __b;
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vorrq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return (uint16x8_t)__builtin_neon_vorrv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+  return __a | __b;
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vorrq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return (uint32x4_t)__builtin_neon_vorrv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+  return __a | __b;
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vorrq_u64 (uint64x2_t __a, uint64x2_t __b)
 {
-  return (uint64x2_t)__builtin_neon_vorrv2di ((int64x2_t) __a, (int64x2_t) __b, 0);
+  return __a | __b;
 }
 
 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
 veor_s8 (int8x8_t __a, int8x8_t __b)
 {
-  return (int8x8_t)__builtin_neon_veorv8qi (__a, __b, 1);
+  return __a ^ __b;
 }
 
 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
 veor_s16 (int16x4_t __a, int16x4_t __b)
 {
-  return (int16x4_t)__builtin_neon_veorv4hi (__a, __b, 1);
+  return __a ^ __b;
 }
 
 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
 veor_s32 (int32x2_t __a, int32x2_t __b)
 {
-  return (int32x2_t)__builtin_neon_veorv2si (__a, __b, 1);
+  return __a ^ __b;
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 veor_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return (uint8x8_t)__builtin_neon_veorv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+  return __a ^ __b;
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 veor_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return (uint16x4_t)__builtin_neon_veorv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+  return __a ^ __b;
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 veor_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return (uint32x2_t)__builtin_neon_veorv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+  return __a ^ __b;
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 veor_s64 (int64x1_t __a, int64x1_t __b)
 {
-  return (int64x1_t)__builtin_neon_veordi (__a, __b, 1);
+  return __a ^ __b;
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 veor_u64 (uint64x1_t __a, uint64x1_t __b)
 {
-  return (uint64x1_t)__builtin_neon_veordi ((int64x1_t) __a, (int64x1_t) __b, 0);
+  return __a ^ __b;
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
 veorq_s8 (int8x16_t __a, int8x16_t __b)
 {
-  return (int8x16_t)__builtin_neon_veorv16qi (__a, __b, 1);
+  return __a ^ __b;
 }
 
 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
 veorq_s16 (int16x8_t __a, int16x8_t __b)
 {
-  return (int16x8_t)__builtin_neon_veorv8hi (__a, __b, 1);
+  return __a ^ __b;
 }
 
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
 veorq_s32 (int32x4_t __a, int32x4_t __b)
 {
-  return (int32x4_t)__builtin_neon_veorv4si (__a, __b, 1);
+  return __a ^ __b;
 }
 
 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
 veorq_s64 (int64x2_t __a, int64x2_t __b)
 {
-  return (int64x2_t)__builtin_neon_veorv2di (__a, __b, 1);
+  return __a ^ __b;
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 veorq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return (uint8x16_t)__builtin_neon_veorv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+  return __a ^ __b;
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 veorq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return (uint16x8_t)__builtin_neon_veorv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+  return __a ^ __b;
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 veorq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return (uint32x4_t)__builtin_neon_veorv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+  return __a ^ __b;
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 veorq_u64 (uint64x2_t __a, uint64x2_t __b)
 {
-  return (uint64x2_t)__builtin_neon_veorv2di ((int64x2_t) __a, (int64x2_t) __b, 0);
+  return __a ^ __b;
 }
 
 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
 vbic_s8 (int8x8_t __a, int8x8_t __b)
 {
-  return (int8x8_t)__builtin_neon_vbicv8qi (__a, __b, 1);
+  return __a & ~__b;
 }
 
 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
 vbic_s16 (int16x4_t __a, int16x4_t __b)
 {
-  return (int16x4_t)__builtin_neon_vbicv4hi (__a, __b, 1);
+  return __a & ~__b;
 }
 
 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
 vbic_s32 (int32x2_t __a, int32x2_t __b)
 {
-  return (int32x2_t)__builtin_neon_vbicv2si (__a, __b, 1);
+  return __a & ~__b;
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vbic_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return (uint8x8_t)__builtin_neon_vbicv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+  return __a & ~__b;
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vbic_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return (uint16x4_t)__builtin_neon_vbicv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+  return __a & ~__b;
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vbic_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return (uint32x2_t)__builtin_neon_vbicv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+  return __a & ~__b;
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vbic_s64 (int64x1_t __a, int64x1_t __b)
 {
-  return (int64x1_t)__builtin_neon_vbicdi (__a, __b, 1);
+  return __a & ~__b;
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vbic_u64 (uint64x1_t __a, uint64x1_t __b)
 {
-  return (uint64x1_t)__builtin_neon_vbicdi ((int64x1_t) __a, (int64x1_t) __b, 0);
+  return __a & ~__b;
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
 vbicq_s8 (int8x16_t __a, int8x16_t __b)
 {
-  return (int8x16_t)__builtin_neon_vbicv16qi (__a, __b, 1);
+  return __a & ~__b;
 }
 
 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
 vbicq_s16 (int16x8_t __a, int16x8_t __b)
 {
-  return (int16x8_t)__builtin_neon_vbicv8hi (__a, __b, 1);
+  return __a & ~__b;
 }
 
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
 vbicq_s32 (int32x4_t __a, int32x4_t __b)
 {
-  return (int32x4_t)__builtin_neon_vbicv4si (__a, __b, 1);
+  return __a & ~__b;
 }
 
 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
 vbicq_s64 (int64x2_t __a, int64x2_t __b)
 {
-  return (int64x2_t)__builtin_neon_vbicv2di (__a, __b, 1);
+  return __a & ~__b;
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vbicq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return (uint8x16_t)__builtin_neon_vbicv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+  return __a & ~__b;
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vbicq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return (uint16x8_t)__builtin_neon_vbicv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+  return __a & ~__b;
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vbicq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return (uint32x4_t)__builtin_neon_vbicv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+  return __a & ~__b;
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vbicq_u64 (uint64x2_t __a, uint64x2_t __b)
 {
-  return (uint64x2_t)__builtin_neon_vbicv2di ((int64x2_t) __a, (int64x2_t) __b, 0);
+  return __a & ~__b;
 }
 
 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
 vorn_s8 (int8x8_t __a, int8x8_t __b)
 {
-  return (int8x8_t)__builtin_neon_vornv8qi (__a, __b, 1);
+  return __a | ~__b;
 }
 
 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
 vorn_s16 (int16x4_t __a, int16x4_t __b)
 {
-  return (int16x4_t)__builtin_neon_vornv4hi (__a, __b, 1);
+  return __a | ~__b;
 }
 
 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
 vorn_s32 (int32x2_t __a, int32x2_t __b)
 {
-  return (int32x2_t)__builtin_neon_vornv2si (__a, __b, 1);
+  return __a | ~__b;
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vorn_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return (uint8x8_t)__builtin_neon_vornv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+  return __a | ~__b;
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vorn_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return (uint16x4_t)__builtin_neon_vornv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+  return __a | ~__b;
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vorn_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return (uint32x2_t)__builtin_neon_vornv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+  return __a | ~__b;
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vorn_s64 (int64x1_t __a, int64x1_t __b)
 {
-  return (int64x1_t)__builtin_neon_vorndi (__a, __b, 1);
+  return __a | ~__b;
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vorn_u64 (uint64x1_t __a, uint64x1_t __b)
 {
-  return (uint64x1_t)__builtin_neon_vorndi ((int64x1_t) __a, (int64x1_t) __b, 0);
+  return __a | ~__b;
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
 vornq_s8 (int8x16_t __a, int8x16_t __b)
 {
-  return (int8x16_t)__builtin_neon_vornv16qi (__a, __b, 1);
+  return __a | ~__b;
 }
 
 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
 vornq_s16 (int16x8_t __a, int16x8_t __b)
 {
-  return (int16x8_t)__builtin_neon_vornv8hi (__a, __b, 1);
+  return __a | ~__b;
 }
 
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
 vornq_s32 (int32x4_t __a, int32x4_t __b)
 {
-  return (int32x4_t)__builtin_neon_vornv4si (__a, __b, 1);
+  return __a | ~__b;
 }
 
 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
 vornq_s64 (int64x2_t __a, int64x2_t __b)
 {
-  return (int64x2_t)__builtin_neon_vornv2di (__a, __b, 1);
+  return __a | ~__b;
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vornq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return (uint8x16_t)__builtin_neon_vornv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+  return __a | ~__b;
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vornq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return (uint16x8_t)__builtin_neon_vornv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+  return __a | ~__b;
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vornq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return (uint32x4_t)__builtin_neon_vornv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+  return __a | ~__b;
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vornq_u64 (uint64x2_t __a, uint64x2_t __b)
 {
-  return (uint64x2_t)__builtin_neon_vornv2di ((int64x2_t) __a, (int64x2_t) __b, 0);
+  return __a | ~__b;
 }
 
-
 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
 vreinterpret_p8_p16 (poly16x4_t __a)
 {
--- a/src/gcc/config/arm/cortex-m7.md
+++ b/src/gcc/config/arm/cortex-m7.md
@@ -0,0 +1,181 @@
+;; ARM Cortex-M7 pipeline description
+;; Copyright (C) 2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "cortex_m7")
+
+;; We model the dual-issue constraints of this core with
+;; following units.
+
+(define_cpu_unit "cm7_i0, cm7_i1" "cortex_m7")
+(define_cpu_unit "cm7_a0, cm7_a1" "cortex_m7")
+(define_cpu_unit "cm7_branch,cm7_wb,cm7_ext,cm7_shf" "cortex_m7")
+(define_cpu_unit "cm7_lsu" "cortex_m7")
+(define_cpu_unit "cm7_mac" "cortex_m7")
+(define_cpu_unit "cm7_fpu" "cortex_m7")
+
+(define_reservation "cm7_all_units"
+                    "cm7_i0+cm7_i1+cm7_a0+cm7_a1+cm7_branch\
+                     +cm7_wb+cm7_ext+cm7_shf+cm7_lsu+cm7_mac\
+                     +cm7_fpu")
+
+;; Simple alu instruction without inline shift operation.
+(define_insn_reservation "cortex_m7_alu_simple" 2
+  (and (eq_attr "tune" "cortexm7")
+       (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
+                        alu_reg,alus_reg,logic_reg,logics_reg,\
+                        adc_imm,adcs_imm,adc_reg,adcs_reg,\
+                        adr,bfm,rev,\
+                        shift_imm,shift_reg,\
+                        mov_imm,mov_reg,mvn_imm,mvn_reg,\
+                        mov_shift_reg,mov_shift,\
+                        mvn_shift,mvn_shift_reg,\
+                        logic_shift_imm,logics_shift_imm,\
+                        alu_shift_reg,alus_shift_reg,\
+                        logic_shift_reg,logics_shift_reg,\
+                        mrs,clz,f_mcr,f_mrc,multiple,no_insn"))
+  "cm7_i0|cm7_i1,cm7_a0|cm7_a1")
+
+;; Simple alu with inline shift operation.
+(define_insn_reservation "cortex_m7_alu_shift" 2
+   (and (eq_attr "tune" "cortexm7")
+	(eq_attr "type" "alu_shift_imm,alus_shift_imm"))
+   "cm7_i0|cm7_i1,(cm7_a0|cm7_a1)+cm7_shf+cm7_branch")
+
+;; Only one ALU can be used for DSP instructions.
+(define_insn_reservation "cortex_m7_dsp" 2
+  (and (eq_attr "tune" "cortexm7")
+       (eq_attr "type" "alu_reg,smlaxy,smlalxy,smulxy"))
+  "cm7_i0|cm7_i1,cm7_a0")
+
+;; The multiply instructions.
+(define_insn_reservation "cortex_m7_multiply" 2
+   (and (eq_attr "tune" "cortexm7")
+        (eq_attr "type" "mul,muls,umull,smull"))
+   "cm7_i0|cm7_i1,(cm7_a0|cm7_a1)+cm7_wb")
+
+(define_insn_reservation "cortex_m7_idiv" 4
+   (and (eq_attr "tune" "cortexm7")
+        (eq_attr "type" "sdiv,udiv"))
+   "cm7_all_units*4")
+
+(define_insn_reservation "cortex_m7_alu_extend" 2
+   (and (eq_attr "tune" "cortexm7")
+        (eq_attr "type" "extend"))
+   "cm7_i0|cm7_i1,(cm7_a0|cm7_a1)+cm7_ext+cm7_branch")
+
+(define_insn_reservation "cortex_m7_mac" 2
+   (and (eq_attr "tune" "cortexm7")
+        (eq_attr "type" "mla,mlas"))
+   "cm7_i0|cm7_i1,cm7_mac+cm7_wb")
+
+;; The branch instructions.
+(define_insn_reservation "cortex_m7_branch" 0
+   (and (eq_attr "tune" "cortexm7")
+        (eq_attr "type" "branch,call"))
+   "cm7_i0|cm7_i1,cm7_branch")
+
+;; The load instructions.
+(define_insn_reservation "cortex_m7_load1" 2
+   (and (eq_attr "tune" "cortexm7")
+        (eq_attr "type" "load_byte, load1"))
+   "cm7_i0|cm7_i1,cm7_lsu")
+
+(define_insn_reservation "cortex_m7_load2" 2
+   (and (eq_attr "tune" "cortexm7")
+        (eq_attr "type" "load2"))
+   "cm7_all_units")
+
+(define_insn_reservation "cortex_m7_loadm" 2
+   (and (eq_attr "tune" "cortexm7")
+        (eq_attr "type" "load3,load4"))
+   "cm7_all_units*2")
+
+;; The store instructions.
+(define_insn_reservation "cortex_m7_store1" 0
+   (and (eq_attr "tune" "cortexm7")
+        (eq_attr "type" "store1"))
+   "cm7_i0|cm7_i1,cm7_lsu+cm7_wb")
+
+(define_insn_reservation "cortex_m7_store2" 0
+   (and (eq_attr "tune" "cortexm7")
+        (eq_attr "type" "store2"))
+   "cm7_all_units")
+
+(define_insn_reservation "cortex_m7_storem" 0
+   (and (eq_attr "tune" "cortexm7")
+        (eq_attr "type" "store3,store4"))
+   "cm7_all_units*2")
+
+;; The FPU instructions.
+(define_insn_reservation "cortex_m7_fpalu" 3
+  (and (eq_attr "tune" "cortexm7")
+       (eq_attr "type" "ffariths,ffarithd,fadds,faddd,fmov,fconsts,\
+                        fconstd,fcmpd,f_cvt,f_cvtf2i,f_cvti2f, fcmps,\
+                        fmuls,f_flag"))
+  "cm7_i0|cm7_i1,cm7_fpu")
+
+(define_insn_reservation "cortex_m7_fmacs" 6
+  (and (eq_attr "tune" "cortexm7")
+       (eq_attr "type" "fmacs,ffmas"))
+  "cm7_i0|cm7_i1,cm7_fpu")
+
+(define_insn_reservation "cortex_m7_fdivs" 16
+  (and (eq_attr "tune" "cortexm7")
+       (eq_attr "type" "fdivs, fsqrts"))
+  "cm7_i0|cm7_i1, cm7_fpu*5")
+
+(define_insn_reservation "cortex_m7_f_loads" 2
+  (and (eq_attr "tune" "cortexm7")
+       (eq_attr "type" "f_loads"))
+  "cm7_i0|cm7_i1, cm7_lsu")
+
+(define_insn_reservation "cortex_m7_f_stores" 0
+  (and (eq_attr "tune" "cortexm7")
+       (eq_attr "type" "f_stores"))
+  "cm7_i0|cm7_i1, cm7_lsu+cm7_wb")
+
+(define_insn_reservation "cortex_m7_fmuld" 6
+  (and (eq_attr "tune" "cortexm7")
+       (eq_attr "type" "fmuld"))
+  "cm7_i0|cm7_i1,cm7_fpu*3")
+
+(define_insn_reservation "cortex_m7_fmacd" 10
+  (and (eq_attr "tune" "cortexm7")
+       (eq_attr "type" "fmacd,ffmad"))
+  "cm7_i0|cm7_i1,cm7_fpu*4")
+
+(define_insn_reservation "cortex_m7_fdivd" 31
+  (and (eq_attr "tune" "cortexm7")
+       (eq_attr "type" "fdivd,fsqrtd"))
+  "cm7_i0|cm7_i1,cm7_fpu*4")
+
+(define_insn_reservation "cortex_m7_f_loadd" 3
+  (and (eq_attr "tune" "cortexm7")
+       (eq_attr "type" "f_loadd"))
+  "cm7_all_units")
+
+(define_insn_reservation "cortex_m7_f_stored" 0
+  (and (eq_attr "tune" "cortexm7")
+       (eq_attr "type" "f_stored"))
+  "cm7_all_units")
+
+(define_insn_reservation "cortex_m7_f_mcr" 1
+  (and (eq_attr "tune" "cortexm7")
+       (eq_attr "type" "f_mcrr,f_mrrc"))
+  "cm7_all_units")
--- a/src/gcc/config/arm/aarch-common.c
+++ b/src/gcc/config/arm/aarch-common.c
@@ -191,6 +191,83 @@
   return 0;
 }
 
+bool
+aarch_rev16_shright_mask_imm_p (rtx val, enum machine_mode mode)
+{
+  return CONST_INT_P (val)
+         && INTVAL (val)
+            == trunc_int_for_mode (HOST_WIDE_INT_C (0xff00ff00ff00ff),
+                                   mode);
+}
+
+bool
+aarch_rev16_shleft_mask_imm_p (rtx val, enum machine_mode mode)
+{
+  return CONST_INT_P (val)
+         && INTVAL (val)
+            == trunc_int_for_mode (HOST_WIDE_INT_C (0xff00ff00ff00ff00),
+                                   mode);
+}
+
+
+static bool
+aarch_rev16_p_1 (rtx lhs, rtx rhs, enum machine_mode mode)
+{
+  if (GET_CODE (lhs) == AND
+         && GET_CODE (XEXP (lhs, 0)) == ASHIFT
+            && CONST_INT_P (XEXP (XEXP (lhs, 0), 1))
+            && INTVAL (XEXP (XEXP (lhs, 0), 1)) == 8
+            && REG_P (XEXP (XEXP (lhs, 0), 0))
+         && CONST_INT_P (XEXP (lhs, 1))
+      && GET_CODE (rhs) == AND
+         && GET_CODE (XEXP (rhs, 0)) == LSHIFTRT
+            && REG_P (XEXP (XEXP (rhs, 0), 0))
+            && CONST_INT_P (XEXP (XEXP (rhs, 0), 1))
+            && INTVAL (XEXP (XEXP (rhs, 0), 1)) == 8
+         && CONST_INT_P (XEXP (rhs, 1))
+      && REGNO (XEXP (XEXP (rhs, 0), 0)) == REGNO (XEXP (XEXP (lhs, 0), 0)))
+
+    {
+      rtx lhs_mask = XEXP (lhs, 1);
+      rtx rhs_mask = XEXP (rhs, 1);
+
+      return aarch_rev16_shright_mask_imm_p (rhs_mask, mode)
+             && aarch_rev16_shleft_mask_imm_p (lhs_mask, mode);
+    }
+
+  return false;
+}
+
+/* Recognise a sequence of bitwise operations corresponding to a rev16 operation.
+   These will be of the form:
+     ((x >> 8) & 0x00ff00ff)
+   | ((x << 8) & 0xff00ff00)
+   for SImode and with similar but wider bitmasks for DImode.
+   The two sub-expressions of the IOR can appear on either side so check both
+   permutations with the help of aarch_rev16_p_1 above.  */
+
+bool
+aarch_rev16_p (rtx x)
+{
+  rtx left_sub_rtx, right_sub_rtx;
+  bool is_rev = false;
+
+  if (GET_CODE (x) != IOR)
+    return false;
+
+  left_sub_rtx = XEXP (x, 0);
+  right_sub_rtx = XEXP (x, 1);
+
+  /* There are no canonicalisation rules for the position of the two shifts
+     involved in a rev, so try both permutations.  */
+  is_rev = aarch_rev16_p_1 (left_sub_rtx, right_sub_rtx, GET_MODE (x));
+
+  if (!is_rev)
+    is_rev = aarch_rev16_p_1 (right_sub_rtx, left_sub_rtx, GET_MODE (x));
+
+  return is_rev;
+}
+
 /* Return nonzero if the CONSUMER instruction (a load) does need
    PRODUCER's value to calculate the address.  */
 int
--- a/src/gcc/config/arm/cortex-a17-neon.md
+++ b/src/gcc/config/arm/cortex-a17-neon.md
@@ -0,0 +1,605 @@
+;; ARM Cortex-A17 NEON pipeline description
+;; Copyright (C) 2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_attr "cortex_a17_neon_type"
+  "neon_abd, neon_abd_q, neon_arith_acc, neon_arith_acc_q,
+   neon_arith_basic, neon_arith_complex,
+   neon_reduc_add_acc, neon_multiply, neon_multiply_q,
+   neon_multiply_long, neon_mla, neon_mla_q, neon_mla_long,
+   neon_sat_mla_long, neon_shift_acc, neon_shift_imm_basic,\
+   neon_shift_imm_complex,
+   neon_shift_reg_basic, neon_shift_reg_basic_q, neon_shift_reg_complex,
+   neon_shift_reg_complex_q, neon_fp_negabs, neon_fp_arith,
+   neon_fp_arith_q, neon_fp_cvt_int,
+   neon_fp_cvt_int_q, neon_fp_cvt16, neon_fp_minmax, neon_fp_mul,
+   neon_fp_mul_q, neon_fp_mla, neon_fp_mla_q, neon_fp_recpe_rsqrte,
+   neon_fp_recpe_rsqrte_q, neon_bitops, neon_bitops_q, neon_from_gp,
+   neon_from_gp_q, neon_move, neon_tbl3_tbl4, neon_zip_q, neon_to_gp,
+   neon_load_a, neon_load_b, neon_load_c, neon_load_d, neon_load_e,
+   neon_load_f, neon_load_g, neon_load_h, neon_store_a, neon_store_b,
+   unknown"
+  (cond [
+          (eq_attr "type" "neon_abd, neon_abd_long")
+            (const_string "neon_abd")
+          (eq_attr "type" "neon_abd_q")
+            (const_string "neon_abd_q")
+          (eq_attr "type" "neon_arith_acc, neon_reduc_add_acc,\
+                           neon_reduc_add_acc_q")
+            (const_string "neon_arith_acc")
+          (eq_attr "type" "neon_arith_acc_q")
+            (const_string "neon_arith_acc_q")
+          (eq_attr "type" "neon_add, neon_add_q, neon_add_long,\
+                           neon_add_widen, neon_neg, neon_neg_q,\
+                           neon_reduc_add, neon_reduc_add_q,\
+                           neon_reduc_add_long, neon_sub, neon_sub_q,\
+                           neon_sub_long, neon_sub_widen, neon_logic,\
+                           neon_logic_q, neon_tst, neon_tst_q")
+            (const_string "neon_arith_basic")
+          (eq_attr "type" "neon_abs, neon_abs_q, neon_add_halve_narrow_q,\
+                           neon_add_halve, neon_add_halve_q,\
+                           neon_sub_halve, neon_sub_halve_q, neon_qabs,\
+                           neon_qabs_q, neon_qadd, neon_qadd_q, neon_qneg,\
+                           neon_qneg_q, neon_qsub, neon_qsub_q,\
+                           neon_sub_halve_narrow_q,\
+                           neon_compare, neon_compare_q,\
+                           neon_compare_zero, neon_compare_zero_q,\
+                           neon_minmax, neon_minmax_q, neon_reduc_minmax,\
+                           neon_reduc_minmax_q")
+            (const_string "neon_arith_complex")
+
+          (eq_attr "type" "neon_mul_b, neon_mul_h, neon_mul_s,\
+                           neon_mul_h_scalar, neon_mul_s_scalar,\
+                           neon_sat_mul_b, neon_sat_mul_h,\
+                           neon_sat_mul_s, neon_sat_mul_h_scalar,\
+                           neon_sat_mul_s_scalar,\
+                           neon_mul_b_long, neon_mul_h_long,\
+                           neon_mul_s_long,\
+                           neon_mul_h_scalar_long, neon_mul_s_scalar_long,\
+                           neon_sat_mul_b_long, neon_sat_mul_h_long,\
+                           neon_sat_mul_s_long, neon_sat_mul_h_scalar_long,\
+                           neon_sat_mul_s_scalar_long")
+            (const_string "neon_multiply")
+          (eq_attr "type" "neon_mul_b_q, neon_mul_h_q, neon_mul_s_q,\
+                           neon_mul_h_scalar_q, neon_mul_s_scalar_q,\
+                           neon_sat_mul_b_q, neon_sat_mul_h_q,\
+                           neon_sat_mul_s_q, neon_sat_mul_h_scalar_q,\
+                           neon_sat_mul_s_scalar_q")
+            (const_string "neon_multiply_q")
+          (eq_attr "type" "neon_mla_b, neon_mla_h, neon_mla_s,\
+                           neon_mla_h_scalar, neon_mla_s_scalar,\
+                           neon_mla_b_long, neon_mla_h_long,\
+                           neon_mla_s_long,\
+                           neon_mla_h_scalar_long, neon_mla_s_scalar_long")
+            (const_string "neon_mla")
+          (eq_attr "type" "neon_mla_b_q, neon_mla_h_q, neon_mla_s_q,\
+                           neon_mla_h_scalar_q, neon_mla_s_scalar_q")
+            (const_string "neon_mla_q")
+          (eq_attr "type" "neon_sat_mla_b_long, neon_sat_mla_h_long,\
+                           neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\
+                           neon_sat_mla_s_scalar_long")
+            (const_string "neon_sat_mla_long")
+
+          (eq_attr "type" "neon_shift_acc, neon_shift_acc_q")
+            (const_string "neon_shift_acc")
+          (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\
+                           neon_shift_imm_narrow_q, neon_shift_imm_long")
+            (const_string "neon_shift_imm_basic")
+          (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,\
+                           neon_sat_shift_imm_narrow_q")
+            (const_string "neon_shift_imm_complex")
+          (eq_attr "type" "neon_shift_reg")
+            (const_string "neon_shift_reg_basic")
+          (eq_attr "type" "neon_shift_reg_q")
+            (const_string "neon_shift_reg_basic_q")
+          (eq_attr "type" "neon_sat_shift_reg")
+            (const_string "neon_shift_reg_complex")
+          (eq_attr "type" "neon_sat_shift_reg_q")
+            (const_string "neon_shift_reg_complex_q")
+
+          (eq_attr "type" "neon_fp_neg_s, neon_fp_neg_s_q,\
+                           neon_fp_abs_s, neon_fp_abs_s_q")
+            (const_string "neon_fp_negabs")
+          (eq_attr "type" "neon_fp_addsub_s, neon_fp_abd_s,\
+                           neon_fp_reduc_add_s, neon_fp_compare_s,\
+                           neon_fp_minmax_s, neon_fp_minmax_s_q,\
+                           neon_fp_reduc_minmax_s, neon_fp_round_s,\
+                           neon_fp_round_s_q, neon_fp_round_d,\
+	                   neon_fp_round_d_q, neon_fp_reduc_minmax_s_q")
+            (const_string "neon_fp_arith")
+          (eq_attr "type" "neon_fp_addsub_s_q, neon_fp_abd_s_q,\
+                           neon_fp_reduc_add_s_q, neon_fp_compare_s_q")
+            (const_string "neon_fp_arith_q")
+          (eq_attr "type" "neon_fp_to_int_s, neon_int_to_fp_s")
+            (const_string "neon_fp_cvt_int")
+          (eq_attr "type" "neon_fp_to_int_s_q, neon_int_to_fp_s_q")
+            (const_string "neon_fp_cvt_int_q")
+          (eq_attr "type" "neon_fp_cvt_narrow_s_q, neon_fp_cvt_widen_h")
+            (const_string "neon_fp_cvt16")
+          (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_s_scalar")
+            (const_string "neon_fp_mul")
+          (eq_attr "type" "neon_fp_mul_s_q, neon_fp_mul_s_scalar_q")
+            (const_string "neon_fp_mul_q")
+          (eq_attr "type" "neon_fp_mla_s, neon_fp_mla_s_scalar")
+            (const_string "neon_fp_mla")
+          (eq_attr "type" "neon_fp_mla_s_q, neon_fp_mla_s_scalar_q")
+            (const_string "neon_fp_mla_q")
+          (eq_attr "type" "neon_fp_recpe_s, neon_fp_rsqrte_s")
+            (const_string "neon_fp_recpe_rsqrte")
+          (eq_attr "type" "neon_fp_recpe_s_q, neon_fp_rsqrte_s_q")
+            (const_string "neon_fp_recpe_rsqrte_q")
+
+          (eq_attr "type" "neon_bsl, neon_cls, neon_cnt,\
+                           neon_rev, neon_permute,\
+                           neon_tbl1, neon_tbl2, neon_zip,\
+                           neon_dup, neon_dup_q, neon_ext, neon_ext_q,\
+                           neon_move, neon_move_q, neon_move_narrow_q")
+            (const_string "neon_bitops")
+          (eq_attr "type" "neon_bsl_q, neon_cls_q, neon_cnt_q,\
+                           neon_rev_q, neon_permute_q")
+            (const_string "neon_bitops_q")
+          (eq_attr "type" "neon_from_gp")
+            (const_string "neon_from_gp")
+          (eq_attr "type" "neon_from_gp_q")
+            (const_string "neon_from_gp_q")
+          (eq_attr "type" "neon_tbl3, neon_tbl4")
+            (const_string "neon_tbl3_tbl4")
+          (eq_attr "type" "neon_zip_q")
+            (const_string "neon_zip_q")
+          (eq_attr "type" "neon_to_gp, neon_to_gp_q")
+            (const_string "neon_to_gp")
+
+          (eq_attr "type" "neon_load1_1reg, neon_load1_1reg_q,\
+                           neon_load1_one_lane, neon_load1_one_lane_q")
+            (const_string "neon_load_a")
+
+          (eq_attr "type" "neon_load1_2reg, neon_load1_2reg_q")
+            (const_string "neon_load_b")
+
+          (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q,\
+                           neon_load1_all_lanes,neon_load1_all_lanes_q,\
+                           neon_load2_one_lane, neon_load2_one_lane_q,\
+                           neon_load2_all_lanes, neon_load2_all_lanes_q")
+            (const_string "neon_load_c")
+
+          (eq_attr "type" "neon_load1_4reg, neon_load1_4reg_q,\
+                          neon_load2_2reg, neon_load2_2reg_q")
+            (const_string "neon_load_d")
+
+          (eq_attr "type" "neon_load3_one_lane,\
+                           neon_load3_all_lanes,\
+                           neon_load4_one_lane, neon_load4_all_lanes")
+            (const_string "neon_load_e")
+
+
+          (eq_attr "type" "neon_load3_one_lane_q,\
+                           neon_load3_all_lanes_q,\
+                           neon_load4_one_lane_q, neon_load4_all_lanes_q")
+            (const_string "neon_load_f")
+
+          (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q")
+            (const_string "neon_load_g")
+
+          (eq_attr "type" "neon_load2_4reg,neon_load2_4reg_q,\
+                           neon_load4_4reg,neon_load4_4reg_q")
+            (const_string "neon_load_h")
+
+          (eq_attr "type" "neon_store1_1reg, neon_store1_1reg_q,\
+                           neon_store1_2reg, neon_store1_2reg_q,\
+                           neon_store1_3reg, neon_store1_3reg_q,\
+                           neon_store1_4reg, neon_store1_4reg_q,\
+                           neon_store1_one_lane, neon_store1_one_lane_q,\
+                           neon_store2_2reg, neon_store2_2reg_q,\
+                           neon_store3_one_lane, neon_store3_one_lane_q,\
+                           neon_store4_one_lane, neon_store4_one_lane_q")
+            (const_string "neon_store_a")
+
+          (eq_attr "type" "neon_store2_4reg, neon_store2_4reg_q,\
+                           neon_store2_one_lane, neon_store2_one_lane_q,\
+                           neon_store3_3reg, neon_store3_3reg_q,\
+                           neon_store4_4reg, neon_store4_4reg_q")
+            (const_string "neon_store_b")
+]
+          (const_string "unknown")))
+
+(define_automaton "cortex_a17_neon")
+
+(define_cpu_unit "ca17_asimd0, ca17_asimd1" "cortex_a17_neon")
+(define_cpu_unit "ca17_fdiv0,ca17_simdfpadd0, ca17_simdfpmul0" "cortex_a17_neon")
+(define_cpu_unit "ca17_simdimac0, ca17_simdialu0, ca17_perm0" "cortex_a17_neon")
+
+(define_cpu_unit "ca17_simdialu1, ca17_perm1, ca17_simdshift1" "cortex_a17_neon")
+(define_cpu_unit "ca17_iacc1" "cortex_a17_neon")
+(define_cpu_unit "ca17_fpmul1, ca17_fpadd1" "cortex_a17_neon")
+
+
+;; Integer Arithmetic Instructions.
+
+(define_insn_reservation  "cortex_a17_neon_abd" 5
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "cortex_a17_neon_type" "neon_abd"))
+  "(ca17_asimd0+ca17_simdialu0) | (ca17_asimd1+ca17_simdialu1)")
+
+(define_insn_reservation  "cortex_a17_neon_abd_q" 5
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "cortex_a17_neon_type" "neon_abd_q"))
+  "ca17_asimd0+ca17_asimd1+ca17_simdialu0+ca17_simdialu1")
+
+(define_insn_reservation  "cortex_a17_neon_aba" 7
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "cortex_a17_neon_type" "neon_arith_acc"))
+  "ca17_asimd1+ca17_simdialu1, ca17_iacc1")
+
+(define_insn_reservation  "cortex_a17_neon_aba_q" 8
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "cortex_a17_neon_type" "neon_arith_acc_q"))
+  "ca17_asimd0+ca17_asimd1+ca17_simdialu0+ca17_simdialu1, ca17_iacc1*2")
+
+(define_insn_reservation  "cortex_a17_neon_arith_basic" 4
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "cortex_a17_neon_type" "neon_arith_basic"))
+  "(ca17_asimd0+ca17_simdialu0) | (ca17_asimd1+ca17_simdialu1)")
+
+(define_insn_reservation  "cortex_a17_neon_arith_complex" 5
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "cortex_a17_neon_type" "neon_arith_complex"))
+  "(ca17_asimd0+ca17_simdialu0) | (ca17_asimd1+ca17_simdialu1)")
+
+;; Integer Multiply Instructions.
+
+(define_insn_reservation "cortex_a17_neon_multiply" 6
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "cortex_a17_neon_type" "neon_multiply"))
+  "ca17_asimd0+ca17_simdimac0")
+
+(define_insn_reservation "cortex_a17_neon_multiply_q" 7
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "cortex_a17_neon_type" "neon_multiply_q"))
+  "(ca17_asimd0+ca17_simdimac0)*2")
+
+(define_insn_reservation "cortex_a17_neon_mla" 6
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "cortex_a17_neon_type" "neon_mla"))
+  "ca17_asimd0+ca17_simdimac0*2")
+
+(define_insn_reservation "cortex_a17_neon_mla_q" 7
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "cortex_a17_neon_type" "neon_mla_q"))
+  "(ca17_asimd0+ca17_simdimac0)*2,ca17_simdimac0")
+
+(define_insn_reservation "cortex_a17_neon_sat_mla_long" 6
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "cortex_a17_neon_type" "neon_sat_mla_long"))
+  "ca17_asimd0+ca17_simdimac0*2")
+
+;; Integer Shift Instructions.
+
+(define_insn_reservation
+  "cortex_a17_neon_shift_acc" 7
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "cortex_a17_neon_type" "neon_shift_acc"))
+  "ca17_asimd1+ca17_simdshift1,ca17_iacc1")
+
+(define_insn_reservation
+  "cortex_a17_neon_shift_imm_basic" 4
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "cortex_a17_neon_type" "neon_shift_imm_basic"))
+  "ca17_asimd1+ca17_simdshift1")
+
+(define_insn_reservation
+  "cortex_a17_neon_shift_imm_complex" 5
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "cortex_a17_neon_type" "neon_shift_imm_complex"))
+  "ca17_asimd1+ca17_simdshift1")
+
+(define_insn_reservation
+  "cortex_a17_neon_shift_reg_basic" 4
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "cortex_a17_neon_type" "neon_shift_reg_basic"))
+  "ca17_asimd1+ca17_simdshift1")
+
+(define_insn_reservation
+  "cortex_a17_neon_shift_reg_basic_q" 5
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "cortex_a17_neon_type" "neon_shift_reg_basic_q"))
+  "(ca17_asimd1+ca17_simdshift1)*2")
+
+(define_insn_reservation
+  "cortex_a17_neon_shift_reg_complex" 5
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "cortex_a17_neon_type" "neon_shift_reg_complex"))
+  "ca17_asimd1+ca17_simdshift1")
+
+(define_insn_reservation
+  "cortex_a17_neon_shift_reg_complex_q" 6
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "cortex_a17_neon_type" "neon_shift_reg_complex_q"))
+  "(ca17_asimd1+ca17_simdshift1)*2")
+
+(define_insn_reservation
+  "cortex_a17_neon_fp_negabs" 4
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "cortex_a17_neon_type" "neon_fp_negabs"))
+  "ca17_asimd0+ca17_simdfpadd0")
+
+(define_insn_reservation
+  "cortex_a17_neon_fp_arith" 6
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "cortex_a17_neon_type" "neon_fp_arith"))
+  "ca17_asimd0+ca17_simdfpadd0")
+
+(define_insn_reservation
+  "cortex_a17_neon_fp_arith_q" 6
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "cortex_a17_neon_type" "neon_fp_arith_q"))
+  "(ca17_asimd0+ca17_simdfpadd0)*2")
+
+(define_insn_reservation
+  "cortex_a17_neon_fp_cvt_int" 6
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "cortex_a17_neon_type" "neon_fp_cvt_int"))
+  "ca17_asimd0+ca17_simdfpadd0")
+
+(define_insn_reservation
+  "cortex_a17_neon_fp_cvt_int_q" 6
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "cortex_a17_neon_type" "neon_fp_cvt_int_q"))
+  "(ca17_asimd0+ca17_simdfpadd0)*2")
+
+(define_insn_reservation
+  "cortex_a17_neon_fp_cvt16" 10
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "cortex_a17_neon_type" "neon_fp_cvt16"))
+  "ca17_asimd0+ca17_simdfpadd0")
+
+(define_insn_reservation
+  "cortex_a17_neon_fp_mul" 5
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "cortex_a17_neon_type" "neon_fp_mul"))
+  "ca17_asimd0+ca17_simdfpmul0")
+
+(define_insn_reservation
+  "cortex_a17_neon_fp_mul_q" 5
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "cortex_a17_neon_type" "neon_fp_mul_q"))
+  "(ca17_asimd0+ca17_simdfpmul0)*2")
+
+(define_insn_reservation
+  "cortex_a17_neon_fp_mla" 8
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "cortex_a17_neon_type" "neon_fp_mla"))
+  "ca17_asimd0+ca17_simdfpmul0,ca17_simdfpadd0")
+
+(define_insn_reservation
+  "cortex_a17_neon_fp_mla_q" 9
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "cortex_a17_neon_type" "neon_fp_mla_q"))
+  "ca17_asimd0+ca17_simdfpmul0,ca17_asimd0+ca17_simdfpadd0+ca17_simdfpmul0,ca17_simdfpadd0")
+
+(define_insn_reservation
+  "cortex_a17_neon_fp_recps_rsqrte" 9
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "cortex_a17_neon_type" "neon_fp_recpe_rsqrte"))
+  "(ca17_asimd0+ca17_perm0)|(ca17_asimd1+ca17_perm1)")
+
+(define_insn_reservation
+  "cortex_a17_neon_fp_recps_rsqrte_q" 9
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "cortex_a17_neon_type" "neon_fp_recpe_rsqrte_q"))
+  "(ca17_asimd0+ca17_perm0)*2|(ca17_asimd1+ca17_perm1)*2")
+
+;; Miscelaneous Instructions.
+
+(define_insn_reservation
+  "cortex_a17_neon_bitops" 4
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "cortex_a17_neon_type" "neon_bitops"))
+  "(ca17_asimd0+ca17_perm0) | (ca17_asimd1+ca17_perm1)")
+
+(define_insn_reservation
+  "cortex_a17_neon_bitops_q" 4
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "cortex_a17_neon_type" "neon_bitops_q"))
+  "(ca17_asimd0+ca17_perm0)*2 | (ca17_asimd1+ca17_perm1)*2")
+
+(define_insn_reservation
+  "cortex_a17_neon_from_gp" 2
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "cortex_a17_neon_type" "neon_from_gp"))
+  "(ca17_asimd0+ca17_perm0)|(ca17_asimd1+ca17_perm1)")
+
+(define_insn_reservation
+  "cortex_a17_neon_from_gp_q" 3
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "cortex_a17_neon_type" "neon_from_gp_q"))
+  "(ca17_asimd0+ca17_perm0)|(ca17_asimd1+ca17_perm1)")
+
+(define_insn_reservation
+  "cortex_a17_neon_tbl3_tbl4" 7
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "cortex_a17_neon_type" "neon_tbl3_tbl4"))
+  "(ca17_asimd0+ca17_perm0)|(ca17_asimd1+ca17_perm1)")
+
+(define_insn_reservation
+  "cortex_a17_neon_zip_q" 7
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "cortex_a17_neon_type" "neon_zip_q"))
+  "(ca17_asimd0+ca17_perm0)|(ca17_asimd1+ca17_perm1)")
+
+(define_insn_reservation
+  "cortex_a17_neon_to_gp" 2
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "cortex_a17_neon_type" "neon_to_gp"))
+  "ca17_asimd0+ca17_perm0*3")
+
+(define_insn_reservation
+  "cortex_a17_vfp_flag" 5
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "type" "f_flag"))
+  "ca17_asimd0+ca17_perm0")
+
+;; Load Instructions.
+
+(define_insn_reservation
+  "cortex_a17_vfp_load" 5
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "type" "f_loads, f_loadd"))
+  "ca17_ls0|ca17_ls1")
+
+(define_insn_reservation
+  "cortex_a17_neon_load_a" 6
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "cortex_a17_neon_type" "neon_load_a"))
+  "ca17_ls0*2|ca17_ls1*2")
+
+(define_insn_reservation
+  "cortex_a17_neon_load_b" 7
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "cortex_a17_neon_type" "neon_load_b"))
+  "ca17_ls0*2|ca17_ls1*2")
+
+(define_insn_reservation
+  "cortex_a17_neon_load_c" 8
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "cortex_a17_neon_type" "neon_load_c"))
+  "ca17_ls0*2|ca17_ls1*2")
+
+(define_insn_reservation
+  "cortex_a17_neon_load_d" 9
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "cortex_a17_neon_type" "neon_load_d"))
+  "ca17_ls0*2|ca17_ls1*2")
+
+(define_insn_reservation
+  "cortex_a17_neon_load_e" 9
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "cortex_a17_neon_type" "neon_load_e"))
+  "ca17_ls0*2|ca17_ls1*2")
+
+(define_insn_reservation
+  "cortex_a17_neon_load_f" 10
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "cortex_a17_neon_type" "neon_load_f"))
+  "ca17_ls0*2+ca17_ls1*2")
+
+(define_insn_reservation
+  "cortex_a17_neon_load_g" 10
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "cortex_a17_neon_type" "neon_load_g"))
+  "ca17_ls0*2+ca17_ls1*2")
+
+(define_insn_reservation
+  "cortex_a17_neon_load_h" 11
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "cortex_a17_neon_type" "neon_load_h"))
+  "ca17_ls0*2+ca17_ls1*2")
+
+;; Store Instructions.
+
+(define_insn_reservation
+  "cortex_a17_vfp_store" 0
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "type" "f_stores, f_stored"))
+  "ca17_ls0|ca17_ls1")
+
+
+(define_insn_reservation
+  "cortex_a17_neon_store_a" 0
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "cortex_a17_neon_type" "neon_store_a"))
+  "ca17_ls0*2|ca17_ls1*2")
+
+(define_insn_reservation
+  "cortex_a17_neon_store_b" 0
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "cortex_a17_neon_type" "neon_store_b"))
+  "ca17_ls0*2+ca17_ls1*2")
+
+;; VFP Operations.
+
+(define_insn_reservation "cortex_a17_vfp_const" 4
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "type" "fconsts,fconstd"))
+  "ca17_asimd1+ca17_fpadd1")
+
+(define_insn_reservation "cortex_a17_vfp_adds_subs" 6
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "type" "fadds"))
+  "ca17_asimd1+ca17_fpadd1")
+
+
+(define_insn_reservation "cortex_a17_vfp_addd_subd" 6
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "type" "faddd"))
+  "ca17_asimd1+ca17_fpadd1")
+
+(define_insn_reservation "cortex_a17_vfp_mul" 6
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "type" "fmuls,fmuld"))
+  "ca17_asimd1+ca17_fpmul1")
+
+(define_insn_reservation "cortex_a17_vfp_mac" 11
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "type" "fmacs,ffmas,fmacd,ffmad"))
+  "ca17_asimd1+ca17_fpmul1,ca17_fpadd1")
+
+(define_insn_reservation "cortex_a17_vfp_cvt" 6
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "type" "f_cvt,f_cvtf2i,f_cvti2f,f_rints,f_rintd"))
+  "ca17_asimd1+ca17_fpadd1")
+
+(define_insn_reservation "cortex_a17_vfp_cmp" 4
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "type" "fcmps,fcmpd"))
+  "ca17_asimd1+ca17_fpadd1")
+
+(define_insn_reservation "cortex_a17_vfp_arithd" 4
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "type" "ffarithd"))
+  "ca17_asimd1+ca17_fpadd1")
+
+(define_insn_reservation "cortex_a17_vfp_cpys" 4
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "type" "fmov,fcsel"))
+  "ca17_asimd1+ca17_fpadd1")
+
+(define_insn_reservation "cortex_a17_gp_to_vfp" 2
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "type" "f_mcr, f_mcrr"))
+  "(ca17_asimd0+ca17_perm0)|(ca17_asimd1+ca17_perm1)")
+
+(define_insn_reservation "cortex_a17_mov_vfp_to_gp" 4
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "type" "f_mrc, f_mrrc"))
+  "ca17_asimd0+ca17_perm0*3")
+
+(define_insn_reservation "cortex_a17_vfp_ariths" 4
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "type" "ffariths"))
+  "ca17_asimd1+ca17_fpadd1")
+
+(define_insn_reservation "cortex_a17_vfp_divs" 18
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "type" "fdivs, fsqrts"))
+  "ca17_asimd0+ca17_fdiv0*10")
+
+(define_insn_reservation "cortex_a17_vfp_divd" 32
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "type" "fdivd, fsqrtd"))
+  "ca17_asimd0+ca17_fdiv0*10")
+
--- a/src/gcc/config/arm/arm-fpus.def
+++ b/src/gcc/config/arm/arm-fpus.def
@@ -37,6 +37,8 @@
 ARM_FPU("vfpv4",	ARM_FP_MODEL_VFP, 4, VFP_REG_D32, false, true, false)
 ARM_FPU("vfpv4-d16",	ARM_FP_MODEL_VFP, 4, VFP_REG_D16, false, true, false)
 ARM_FPU("fpv4-sp-d16",	ARM_FP_MODEL_VFP, 4, VFP_REG_SINGLE, false, true, false)
+ARM_FPU("fpv5-sp-d16",	ARM_FP_MODEL_VFP, 5, VFP_REG_SINGLE, false, true, false)
+ARM_FPU("fpv5-d16",	ARM_FP_MODEL_VFP, 5, VFP_REG_D16, false, true, false)
 ARM_FPU("neon-vfpv4",	ARM_FP_MODEL_VFP, 4, VFP_REG_D32, true, true, false)
 ARM_FPU("fp-armv8",	ARM_FP_MODEL_VFP, 8, VFP_REG_D32, false, true, false)
 ARM_FPU("neon-fp-armv8",ARM_FP_MODEL_VFP, 8, VFP_REG_D32, true, true, false)
--- a/src/gcc/config/arm/cortex-a53.md
+++ b/src/gcc/config/arm/cortex-a53.md
@@ -75,7 +75,7 @@
        (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
                         alu_reg,alus_reg,logic_reg,logics_reg,\
                         adc_imm,adcs_imm,adc_reg,adcs_reg,\
-                        adr,bfm,csel,rev,\
+                        adr,bfm,csel,clz,rbit,rev,\
                         shift_imm,shift_reg,\
                         mov_imm,mov_reg,mvn_imm,mvn_reg,\
                         mrs,multiple,no_insn"))
@@ -84,8 +84,8 @@
 (define_insn_reservation "cortex_a53_alu_shift" 2
   (and (eq_attr "tune" "cortexa53")
        (eq_attr "type" "alu_shift_imm,alus_shift_imm,\
-                        logic_shift_imm,logics_shift_imm,\
-                        alu_shift_reg,alus_shift_reg,\
+                        crc,logic_shift_imm,logics_shift_imm,\
+                        alu_ext,alus_ext,alu_shift_reg,alus_shift_reg,\
                         logic_shift_reg,logics_shift_reg,\
                         extend,mov_shift,mov_shift_reg,\
                         mvn_shift,mvn_shift_reg"))
@@ -216,7 +216,8 @@
   (and (eq_attr "tune" "cortexa53")
        (eq_attr "type" "ffariths, fadds, ffarithd, faddd, fmov, fmuls,\
                         f_cvt,f_cvtf2i,f_cvti2f,\
-			fcmps, fcmpd, fcsel"))
+                        fcmps, fcmpd, fcsel, f_rints, f_rintd, f_minmaxs,\
+                        f_minmaxd"))
   "cortex_a53_slot0+cortex_a53_fpadd_pipe")
 
 (define_insn_reservation "cortex_a53_fconst" 2
--- a/src/gcc/config/arm/cortex-a17.md
+++ b/src/gcc/config/arm/cortex-a17.md
@@ -0,0 +1,169 @@
+;; ARM Cortex-A17 pipeline description
+;; Copyright (C) 2014 Free Software Foundation, Inc.
+;;
+;; Contributed by ARM Ltd.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+(define_automaton "cortex_a17")
+
+(define_cpu_unit "ca17_ls0, ca17_ls1" "cortex_a17")
+(define_cpu_unit "ca17_alu0, ca17_alu1" "cortex_a17")
+(define_cpu_unit "ca17_mac" "cortex_a17")
+(define_cpu_unit "ca17_idiv" "cortex_a17")
+(define_cpu_unit "ca17_bx" "cortex_a17")
+
+(define_reservation "ca17_alu" "(ca17_alu0|ca17_alu1)")
+
+
+
+;; Simple Execution Unit:
+;;
+;; Simple ALU
+(define_insn_reservation "cortex_a17_alu" 1
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
+                        alu_reg,alus_reg,logic_reg,logics_reg,\
+                        adc_imm,adcs_imm,adc_reg,adcs_reg,\
+                        adr, mov_imm,mov_reg,\
+                        mvn_imm,mvn_reg,extend,\
+                        mrs,multiple,no_insn"))
+  "ca17_alu")
+
+(define_insn_reservation "cortex_a17_alu_shiftimm" 2
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "type" "bfm,clz,rev,rbit, alu_shift_imm, alus_shift_imm,
+                        logic_shift_imm,alu_reg,logics_shift_imm,shift_imm,\
+                        shift_reg, mov_shift,mvn_shift"))
+  "ca17_alu")
+
+
+;; ALU ops with register controlled shift.
+(define_insn_reservation "cortex_a17_alu_shift_reg" 2
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "type" "alu_shift_reg,alus_shift_reg,\
+                        logic_shift_reg,logics_shift_reg"))
+  "ca17_alu0")
+
+
+;; Multiply Execution Unit:
+
+;; 32-bit multiplies
+(define_insn_reservation "cortex_a17_mult32" 4
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "type" "mul,muls,smmul,smmulr"))
+  "ca17_alu0+ca17_mac")
+
+(define_insn_reservation "cortex_a17_mac32" 4
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "type" "mla,mlas,smmla"))
+  "ca17_alu0+ca17_mac,ca17_mac")
+
+(define_insn_reservation "cortex_a17_mac32_other" 3
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "type" "smlad,smladx,smlsd,smlsdx,smuad,smuadx,smusd,smusdx"))
+  "ca17_alu0+ca17_mac,ca17_mac")
+
+;; 64-bit multiplies
+(define_insn_reservation "cortex_a17_mac64" 4
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "type" "smlal,smlals,umaal,umlal,umlals"))
+  "ca17_alu0+ca17_mac,ca17_mac")
+
+(define_insn_reservation "cortex_a17_mac64_other" 3
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "type" "smlald,smlalxy,smlsld"))
+  "ca17_alu0+ca17_mac,ca17_mac")
+
+(define_insn_reservation "cortex_a17_mult64" 4
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "type" "smull,smulls,umull,umulls"))
+  "ca17_alu0+ca17_mac,ca17_mac")
+
+
+(define_bypass 2 "cortex_a17_mult*, cortex_a17_mac*"
+                 "cortex_a17_mult*, cortex_a17_mac*"
+                 "arm_mac_accumulator_is_result")
+
+;; Integer divide
+(define_insn_reservation "cortex_a17_udiv" 19
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "type" "udiv"))
+  "ca17_alu1+ca17_idiv*10")
+
+(define_insn_reservation "cortex_a17_sdiv" 20
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "type" "sdiv"))
+  "ca17_alu1+ca17_idiv*11")
+
+
+
+;; Branch execution Unit
+;;
+;; Branches take one issue slot.
+;; No latency as there is no result
+(define_insn_reservation "cortex_a17_branch" 0
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "type" "branch"))
+  "ca17_bx")
+
+;; Load-store execution Unit
+;;
+;; Loads of up to two words.
+(define_insn_reservation "cortex_a17_load1" 4
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "type" "load_byte,load1,load2"))
+  "ca17_ls0|ca17_ls1")
+
+;; Loads of three words.
+(define_insn_reservation "cortex_a17_load3" 4
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "type" "load3"))
+  "ca17_ls0+ca17_ls1")
+
+;; Loads of four words.
+(define_insn_reservation "cortex_a17_load4" 4
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "type" "load4"))
+  "ca17_ls0+ca17_ls1")
+
+;; Stores of up to two words.
+(define_insn_reservation "cortex_a17_store1" 0
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "type" "store1,store2"))
+  "ca17_ls0|ca17_ls1")
+
+;; Stores of three words
+(define_insn_reservation "cortex_a17_store3" 0
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "type" "store3"))
+  "ca17_ls0+ca17_ls1")
+
+;; Stores of four words.
+(define_insn_reservation "cortex_a17_store4" 0
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "type" "store4"))
+  "ca17_ls0+ca17_ls1")
+
+(define_insn_reservation "cortex_a17_call" 0
+  (and (eq_attr "tune" "cortexa17")
+       (eq_attr "type" "call"))
+  "ca17_bx")
+
+
+(include "../arm/cortex-a17-neon.md")
--- a/src/gcc/config/arm/cortex-a57.md
+++ b/src/gcc/config/arm/cortex-a57.md
@@ -0,0 +1,797 @@
+;; ARM Cortex-A57 pipeline description
+;; Copyright (C) 2014-2015 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "cortex_a57")
+
+(define_attr "cortex_a57_neon_type"
+  "neon_abd, neon_abd_q, neon_arith_acc, neon_arith_acc_q,
+   neon_arith_basic, neon_arith_complex,
+   neon_reduc_add_acc, neon_multiply, neon_multiply_q,
+   neon_multiply_long, neon_mla, neon_mla_q, neon_mla_long,
+   neon_sat_mla_long, neon_shift_acc, neon_shift_imm_basic,
+   neon_shift_imm_complex,
+   neon_shift_reg_basic, neon_shift_reg_basic_q, neon_shift_reg_complex,
+   neon_shift_reg_complex_q, neon_fp_negabs, neon_fp_arith,
+   neon_fp_arith_q, neon_fp_reductions_q, neon_fp_cvt_int,
+   neon_fp_cvt_int_q, neon_fp_cvt16, neon_fp_minmax, neon_fp_mul,
+   neon_fp_mul_q, neon_fp_mla, neon_fp_mla_q, neon_fp_recpe_rsqrte,
+   neon_fp_recpe_rsqrte_q, neon_fp_recps_rsqrts, neon_fp_recps_rsqrts_q,
+   neon_bitops, neon_bitops_q, neon_from_gp,
+   neon_from_gp_q, neon_move, neon_tbl3_tbl4, neon_zip_q, neon_to_gp,
+   neon_load_a, neon_load_b, neon_load_c, neon_load_d, neon_load_e,
+   neon_load_f, neon_store_a, neon_store_b, neon_store_complex,
+   unknown"
+  (cond [
+	  (eq_attr "type" "neon_abd, neon_abd_long")
+	    (const_string "neon_abd")
+	  (eq_attr "type" "neon_abd_q")
+	    (const_string "neon_abd_q")
+	  (eq_attr "type" "neon_arith_acc, neon_reduc_add_acc,\
+			   neon_reduc_add_acc_q")
+	    (const_string "neon_arith_acc")
+	  (eq_attr "type" "neon_arith_acc_q")
+	    (const_string "neon_arith_acc_q")
+	  (eq_attr "type" "neon_add, neon_add_q, neon_add_long,\
+			   neon_add_widen, neon_neg, neon_neg_q,\
+			   neon_reduc_add, neon_reduc_add_q,\
+			   neon_reduc_add_long, neon_sub, neon_sub_q,\
+			   neon_sub_long, neon_sub_widen, neon_logic,\
+			   neon_logic_q, neon_tst, neon_tst_q")
+	    (const_string "neon_arith_basic")
+	  (eq_attr "type" "neon_abs, neon_abs_q, neon_add_halve_narrow_q,\
+			   neon_add_halve, neon_add_halve_q,\
+			   neon_sub_halve, neon_sub_halve_q, neon_qabs,\
+			   neon_qabs_q, neon_qadd, neon_qadd_q, neon_qneg,\
+			   neon_qneg_q, neon_qsub, neon_qsub_q,\
+			   neon_sub_halve_narrow_q,\
+			   neon_compare, neon_compare_q,\
+			   neon_compare_zero, neon_compare_zero_q,\
+			   neon_minmax, neon_minmax_q, neon_reduc_minmax,\
+			   neon_reduc_minmax_q")
+	    (const_string "neon_arith_complex")
+
+	  (eq_attr "type" "neon_mul_b, neon_mul_h, neon_mul_s,\
+			   neon_mul_h_scalar, neon_mul_s_scalar,\
+			   neon_sat_mul_b, neon_sat_mul_h,\
+			   neon_sat_mul_s, neon_sat_mul_h_scalar,\
+			   neon_sat_mul_s_scalar,\
+			   neon_mul_b_long, neon_mul_h_long,\
+			   neon_mul_s_long, neon_mul_d_long,\
+			   neon_mul_h_scalar_long, neon_mul_s_scalar_long,\
+			   neon_sat_mul_b_long, neon_sat_mul_h_long,\
+			   neon_sat_mul_s_long, neon_sat_mul_h_scalar_long,\
+			   neon_sat_mul_s_scalar_long")
+	    (const_string "neon_multiply")
+	  (eq_attr "type" "neon_mul_b_q, neon_mul_h_q, neon_mul_s_q,\
+			   neon_mul_h_scalar_q, neon_mul_s_scalar_q,\
+			   neon_sat_mul_b_q, neon_sat_mul_h_q,\
+			   neon_sat_mul_s_q, neon_sat_mul_h_scalar_q,\
+			   neon_sat_mul_s_scalar_q")
+	    (const_string "neon_multiply_q")
+	  (eq_attr "type" "neon_mla_b, neon_mla_h, neon_mla_s,\
+			   neon_mla_h_scalar, neon_mla_s_scalar,\
+			   neon_mla_b_long, neon_mla_h_long,\
+			   neon_mla_s_long,\
+			   neon_mla_h_scalar_long, neon_mla_s_scalar_long")
+	    (const_string "neon_mla")
+	  (eq_attr "type" "neon_mla_b_q, neon_mla_h_q, neon_mla_s_q,\
+			   neon_mla_h_scalar_q, neon_mla_s_scalar_q")
+	    (const_string "neon_mla_q")
+	  (eq_attr "type" "neon_sat_mla_b_long, neon_sat_mla_h_long,\
+			   neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\
+			   neon_sat_mla_s_scalar_long")
+	    (const_string "neon_sat_mla_long")
+
+	  (eq_attr "type" "neon_shift_acc, neon_shift_acc_q")
+	    (const_string "neon_shift_acc")
+	  (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\
+			   neon_shift_imm_narrow_q, neon_shift_imm_long")
+	    (const_string "neon_shift_imm_basic")
+	  (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,\
+			   neon_sat_shift_imm_narrow_q")
+	    (const_string "neon_shift_imm_complex")
+	  (eq_attr "type" "neon_shift_reg")
+	    (const_string "neon_shift_reg_basic")
+	  (eq_attr "type" "neon_shift_reg_q")
+	    (const_string "neon_shift_reg_basic_q")
+	  (eq_attr "type" "neon_sat_shift_reg")
+	    (const_string "neon_shift_reg_complex")
+	  (eq_attr "type" "neon_sat_shift_reg_q")
+	    (const_string "neon_shift_reg_complex_q")
+
+	  (eq_attr "type" "neon_fp_neg_s, neon_fp_neg_s_q,\
+			   neon_fp_abs_s, neon_fp_abs_s_q,\
+			   neon_fp_neg_d, neon_fp_neg_d_q,\
+			   neon_fp_abs_d, neon_fp_abs_d_q")
+	    (const_string "neon_fp_negabs")
+	  (eq_attr "type" "neon_fp_addsub_s, neon_fp_abd_s,\
+			   neon_fp_reduc_add_s, neon_fp_compare_s,\
+			   neon_fp_minmax_s, neon_fp_round_s,\
+			   neon_fp_addsub_d, neon_fp_abd_d,\
+			   neon_fp_reduc_add_d, neon_fp_compare_d,\
+			   neon_fp_minmax_d, neon_fp_round_d,\
+			   neon_fp_reduc_minmax_s, neon_fp_reduc_minmax_d")
+	    (const_string "neon_fp_arith")
+	  (eq_attr "type" "neon_fp_addsub_s_q, neon_fp_abd_s_q,\
+			   neon_fp_reduc_add_s_q, neon_fp_compare_s_q,\
+			   neon_fp_minmax_s_q, neon_fp_round_s_q,\
+			   neon_fp_addsub_d_q, neon_fp_abd_d_q,\
+			   neon_fp_reduc_add_d_q, neon_fp_compare_d_q,\
+			   neon_fp_minmax_d_q, neon_fp_round_d_q")
+	    (const_string "neon_fp_arith_q")
+	  (eq_attr "type" "neon_fp_reduc_minmax_s_q,\
+			   neon_fp_reduc_minmax_d_q,\
+			   neon_fp_reduc_add_s_q, neon_fp_reduc_add_d_q")
+	    (const_string "neon_fp_reductions_q")
+	  (eq_attr "type" "neon_fp_to_int_s, neon_int_to_fp_s,\
+			   neon_fp_to_int_d, neon_int_to_fp_d")
+	    (const_string "neon_fp_cvt_int")
+	  (eq_attr "type" "neon_fp_to_int_s_q, neon_int_to_fp_s_q,\
+			   neon_fp_to_int_d_q, neon_int_to_fp_d_q")
+	    (const_string "neon_fp_cvt_int_q")
+	  (eq_attr "type" "neon_fp_cvt_narrow_s_q, neon_fp_cvt_widen_h")
+	    (const_string "neon_fp_cvt16")
+	  (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_s_scalar,\
+			   neon_fp_mul_d")
+	    (const_string "neon_fp_mul")
+	  (eq_attr "type" "neon_fp_mul_s_q, neon_fp_mul_s_scalar_q,\
+			   neon_fp_mul_d_q, neon_fp_mul_d_scalar_q")
+	    (const_string "neon_fp_mul_q")
+	  (eq_attr "type" "neon_fp_mla_s, neon_fp_mla_s_scalar,\
+			   neon_fp_mla_d")
+	    (const_string "neon_fp_mla")
+	  (eq_attr "type" "neon_fp_mla_s_q, neon_fp_mla_s_scalar_q,
+			   neon_fp_mla_d_q, neon_fp_mla_d_scalar_q")
+	    (const_string "neon_fp_mla_q")
+	  (eq_attr "type" "neon_fp_recpe_s, neon_fp_rsqrte_s,\
+			   neon_fp_recpx_s,\
+			   neon_fp_recpe_d, neon_fp_rsqrte_d,\
+			   neon_fp_recpx_d")
+	    (const_string "neon_fp_recpe_rsqrte")
+	  (eq_attr "type" "neon_fp_recpe_s_q, neon_fp_rsqrte_s_q,\
+			   neon_fp_recpx_s_q,\
+			   neon_fp_recpe_d_q, neon_fp_rsqrte_d_q,\
+			   neon_fp_recpx_d_q")
+	    (const_string "neon_fp_recpe_rsqrte_q")
+	  (eq_attr "type" "neon_fp_recps_s, neon_fp_rsqrts_s,\
+			   neon_fp_recps_d, neon_fp_rsqrts_d")
+	    (const_string "neon_fp_recps_rsqrts")
+	  (eq_attr "type" "neon_fp_recps_s_q, neon_fp_rsqrts_s_q,\
+			   neon_fp_recps_d_q, neon_fp_rsqrts_d_q")
+	    (const_string "neon_fp_recps_rsqrts_q")
+	  (eq_attr "type" "neon_bsl, neon_cls, neon_cnt,\
+			   neon_rev, neon_permute, neon_rbit,\
+			   neon_tbl1, neon_tbl2, neon_zip,\
+			   neon_dup, neon_dup_q, neon_ext, neon_ext_q,\
+			   neon_move, neon_move_q, neon_move_narrow_q")
+	    (const_string "neon_bitops")
+	  (eq_attr "type" "neon_bsl_q, neon_cls_q, neon_cnt_q,\
+			   neon_rev_q, neon_permute_q, neon_rbit_q")
+	    (const_string "neon_bitops_q")
+	  (eq_attr "type" "neon_from_gp,f_mcr,f_mcrr")
+	    (const_string "neon_from_gp")
+	  (eq_attr "type" "neon_from_gp_q")
+	    (const_string "neon_from_gp_q")
+	  (eq_attr "type" "neon_tbl3, neon_tbl4")
+	    (const_string "neon_tbl3_tbl4")
+	  (eq_attr "type" "neon_zip_q")
+	    (const_string "neon_zip_q")
+	  (eq_attr "type" "neon_to_gp, neon_to_gp_q,f_mrc,f_mrrc")
+	    (const_string "neon_to_gp")
+
+	  (eq_attr "type" "f_loads, f_loadd,\
+			   neon_load1_1reg, neon_load1_1reg_q,\
+			   neon_load1_2reg, neon_load1_2reg_q")
+	    (const_string "neon_load_a")
+	  (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q,\
+			   neon_load1_4reg, neon_load1_4reg_q")
+	    (const_string "neon_load_b")
+	  (eq_attr "type" "neon_load1_one_lane, neon_load1_one_lane_q,\
+			   neon_load1_all_lanes, neon_load1_all_lanes_q,\
+			   neon_load2_2reg, neon_load2_2reg_q,\
+			   neon_load2_all_lanes, neon_load2_all_lanes_q")
+	    (const_string "neon_load_c")
+	  (eq_attr "type" "neon_load2_4reg, neon_load2_4reg_q,\
+			   neon_load3_3reg, neon_load3_3reg_q,\
+			   neon_load3_one_lane, neon_load3_one_lane_q,\
+			   neon_load4_4reg, neon_load4_4reg_q")
+	    (const_string "neon_load_d")
+	  (eq_attr "type" "neon_load2_one_lane, neon_load2_one_lane_q,\
+			   neon_load3_all_lanes, neon_load3_all_lanes_q,\
+			   neon_load4_all_lanes, neon_load4_all_lanes_q")
+	    (const_string "neon_load_e")
+	  (eq_attr "type" "neon_load4_one_lane, neon_load4_one_lane_q")
+	    (const_string "neon_load_f")
+
+	  (eq_attr "type" "f_stores, f_stored,\
+			   neon_store1_1reg")
+	    (const_string "neon_store_a")
+	  (eq_attr "type" "neon_store1_2reg, neon_store1_1reg_q")
+	    (const_string "neon_store_b")
+	  (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q,\
+			   neon_store3_3reg, neon_store3_3reg_q,\
+			   neon_store2_4reg, neon_store2_4reg_q,\
+			   neon_store4_4reg, neon_store4_4reg_q,\
+			   neon_store2_2reg, neon_store2_2reg_q,\
+			   neon_store3_one_lane, neon_store3_one_lane_q,\
+			   neon_store4_one_lane, neon_store4_one_lane_q,\
+			   neon_store1_4reg, neon_store1_4reg_q,\
+			   neon_store1_one_lane, neon_store1_one_lane_q,\
+			   neon_store2_one_lane, neon_store2_one_lane_q")
+	    (const_string "neon_store_complex")]
+	  (const_string "unknown")))
+
+;; The Cortex-A57 core is modelled as a triple issue pipeline that has
+;; the following functional units.
+;; 1.  Two pipelines for integer operations: SX1, SX2
+
+(define_cpu_unit "ca57_sx1_issue" "cortex_a57")
+(define_reservation "ca57_sx1" "ca57_sx1_issue")
+
+(define_cpu_unit "ca57_sx2_issue" "cortex_a57")
+(define_reservation "ca57_sx2" "ca57_sx2_issue")
+
+;; 2.  One pipeline for complex integer operations: MX
+
+(define_cpu_unit "ca57_mx_issue"
+		 "cortex_a57")
+(define_reservation "ca57_mx" "ca57_mx_issue")
+(define_reservation "ca57_mx_block" "ca57_mx_issue")
+
+;; 3.  Two asymmetric pipelines for Neon and FP operations: CX1, CX2
+(define_automaton "cortex_a57_cx")
+
+(define_cpu_unit "ca57_cx1_issue"
+		 "cortex_a57_cx")
+(define_cpu_unit "ca57_cx2_issue"
+		 "cortex_a57_cx")
+
+(define_reservation "ca57_cx1" "ca57_cx1_issue")
+
+(define_reservation "ca57_cx2" "ca57_cx2_issue")
+(define_reservation "ca57_cx2_block" "ca57_cx2_issue*2")
+
+;; 4.  One pipeline for branch operations: BX
+
+(define_cpu_unit "ca57_bx_issue" "cortex_a57")
+(define_reservation "ca57_bx" "ca57_bx_issue")
+
+;; 5.  Two pipelines for load and store operations: LS1, LS2.  The most
+;;     valuable thing we can do is force a structural hazard to split
+;;     up loads/stores.
+
+(define_cpu_unit "ca57_ls_issue" "cortex_a57")
+(define_cpu_unit "ca57_ldr, ca57_str" "cortex_a57")
+(define_reservation "ca57_load_model" "ca57_ls_issue,ca57_ldr*2")
+(define_reservation "ca57_store_model" "ca57_ls_issue,ca57_str")
+
+;; Block all issue queues.
+
+(define_reservation "ca57_block" "ca57_cx1_issue + ca57_cx2_issue
+				  + ca57_mx_issue + ca57_sx1_issue
+				  + ca57_sx2_issue + ca57_ls_issue")
+
+;; Simple Execution Unit:
+;;
+;; Simple ALU without shift
+(define_insn_reservation "cortex_a57_alu" 2
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
+			alu_reg,alus_reg,logic_reg,logics_reg,\
+			adc_imm,adcs_imm,adc_reg,adcs_reg,\
+			adr,bfm,clz,rbit,rev,alu_reg,\
+			shift_imm,shift_reg,\
+			mov_imm,mov_reg,\
+			mvn_imm,mvn_reg,\
+			mrs,multiple,no_insn"))
+  "ca57_sx1|ca57_sx2")
+
+;; ALU ops with immediate shift
+(define_insn_reservation "cortex_a57_alu_shift" 3
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "extend,\
+			alu_shift_imm,alus_shift_imm,\
+			crc,logic_shift_imm,logics_shift_imm,\
+			mov_shift,mvn_shift"))
+  "ca57_mx")
+
+;; Multi-Cycle Execution Unit:
+;;
+;; ALU ops with register controlled shift
+(define_insn_reservation "cortex_a57_alu_shift_reg" 3
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "alu_shift_reg,alus_shift_reg,\
+			logic_shift_reg,logics_shift_reg,\
+			mov_shift_reg,mvn_shift_reg"))
+   "ca57_mx")
+
+;; All multiplies
+;; TODO: AArch32 and AArch64 have different behaviour
+(define_insn_reservation "cortex_a57_mult32" 3
+  (and (eq_attr "tune" "cortexa57")
+       (ior (eq_attr "mul32" "yes")
+	    (eq_attr "mul64" "yes")))
+  "ca57_mx")
+
+;; Integer divide
+(define_insn_reservation "cortex_a57_div" 10
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "udiv,sdiv"))
+  "ca57_mx_issue,ca57_mx_block*3")
+
+;; Block all issue pipes for a cycle
+(define_insn_reservation "cortex_a57_block" 1
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "block"))
+  "ca57_block")
+
+;; Branch execution Unit
+;;
+;; Branches take one issue slot.
+;; No latency as there is no result
+(define_insn_reservation "cortex_a57_branch" 0
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "branch"))
+  "ca57_bx")
+
+;; Load-store execution Unit
+;;
+;; Loads of up to two words.
+(define_insn_reservation "cortex_a57_load1" 5
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "load_byte,load1,load2"))
+  "ca57_load_model")
+
+;; Loads of three or four words.
+(define_insn_reservation "cortex_a57_load3" 5
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "load3,load4"))
+  "ca57_ls_issue*2,ca57_load_model")
+
+;; Stores of up to two words.
+(define_insn_reservation "cortex_a57_store1" 0
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "store1,store2"))
+  "ca57_store_model")
+
+;; Stores of three or four words.
+(define_insn_reservation "cortex_a57_store3" 0
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "store3,store4"))
+  "ca57_ls_issue*2,ca57_store_model")
+
+;; Advanced SIMD Unit - Integer Arithmetic Instructions.
+
+(define_insn_reservation  "cortex_a57_neon_abd" 5
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_abd"))
+  "ca57_cx1|ca57_cx2")
+
+(define_insn_reservation  "cortex_a57_neon_abd_q" 5
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_abd_q"))
+  "ca57_cx1+ca57_cx2")
+
+(define_insn_reservation  "cortex_a57_neon_aba" 7
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_arith_acc"))
+  "ca57_cx2")
+
+(define_insn_reservation  "cortex_a57_neon_aba_q" 8
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_arith_acc_q"))
+  "ca57_cx2+(ca57_cx2_issue,ca57_cx2)")
+
+(define_insn_reservation  "cortex_a57_neon_arith_basic" 4
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_arith_basic"))
+  "ca57_cx1|ca57_cx2")
+
+(define_insn_reservation  "cortex_a57_neon_arith_complex" 5
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_arith_complex"))
+  "ca57_cx1|ca57_cx2")
+
+;; Integer Multiply Instructions.
+
+(define_insn_reservation "cortex_a57_neon_multiply" 6
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_multiply"))
+  "ca57_cx1")
+
+(define_insn_reservation "cortex_a57_neon_multiply_q" 7
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_multiply_q"))
+  "ca57_cx1+(ca57_cx1_issue,ca57_cx1)")
+
+(define_insn_reservation "cortex_a57_neon_mla" 6
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_mla"))
+  "ca57_cx1")
+
+(define_insn_reservation "cortex_a57_neon_mla_q" 7
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_mla_q"))
+  "ca57_cx1+(ca57_cx1_issue,ca57_cx1)")
+
+(define_insn_reservation "cortex_a57_neon_sat_mla_long" 6
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_sat_mla_long"))
+  "ca57_cx1")
+
+;; Integer Shift Instructions.
+
+(define_insn_reservation
+  "cortex_a57_neon_shift_acc" 7
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_shift_acc"))
+  "ca57_cx2")
+
+(define_insn_reservation
+  "cortex_a57_neon_shift_imm_basic" 4
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_shift_imm_basic"))
+  "ca57_cx2")
+
+(define_insn_reservation
+  "cortex_a57_neon_shift_imm_complex" 5
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_shift_imm_complex"))
+  "ca57_cx2")
+
+(define_insn_reservation
+  "cortex_a57_neon_shift_reg_basic" 4
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_shift_reg_basic"))
+  "ca57_cx2")
+
+(define_insn_reservation
+  "cortex_a57_neon_shift_reg_basic_q" 5
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_shift_reg_basic_q"))
+  "ca57_cx2+(ca57_cx2_issue,ca57_cx2)")
+
+(define_insn_reservation
+  "cortex_a57_neon_shift_reg_complex" 5
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_shift_reg_complex"))
+  "ca57_cx2")
+
+(define_insn_reservation
+  "cortex_a57_neon_shift_reg_complex_q" 6
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_shift_reg_complex_q"))
+  "ca57_cx2+(ca57_cx2_issue,ca57_cx2)")
+
+;; Floating Point Instructions.
+
+(define_insn_reservation
+  "cortex_a57_neon_fp_negabs" 4
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_fp_negabs"))
+  "(ca57_cx1|ca57_cx2)")
+
+(define_insn_reservation
+  "cortex_a57_neon_fp_arith" 6
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_fp_arith"))
+  "(ca57_cx1|ca57_cx2)")
+
+(define_insn_reservation
+  "cortex_a57_neon_fp_arith_q" 6
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_fp_arith_q"))
+  "(ca57_cx1+ca57_cx2)")
+
+(define_insn_reservation
+  "cortex_a57_neon_fp_reductions_q" 10
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_fp_reductions_q"))
+  "(ca57_cx1+ca57_cx2),(ca57_cx1|ca57_cx2)")
+
+(define_insn_reservation
+  "cortex_a57_neon_fp_cvt_int" 6
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_fp_cvt_int"))
+  "(ca57_cx1|ca57_cx2)")
+
+(define_insn_reservation
+  "cortex_a57_neon_fp_cvt_int_q" 6
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_fp_cvt_int_q"))
+  "(ca57_cx1+ca57_cx2)")
+
+(define_insn_reservation
+  "cortex_a57_neon_fp_cvt16" 10
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_fp_cvt16"))
+  "(ca57_cx1_issue+ca57_cx2_issue),(ca57_cx1|ca57_cx2)")
+
+(define_insn_reservation
+  "cortex_a57_neon_fp_mul" 5
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_fp_mul"))
+  "(ca57_cx1|ca57_cx2)")
+
+(define_insn_reservation
+  "cortex_a57_neon_fp_mul_q" 5
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_fp_mul_q"))
+  "(ca57_cx1+ca57_cx2)")
+
+(define_insn_reservation
+  "cortex_a57_neon_fp_mla" 9
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_fp_mla"))
+  "(ca57_cx1,ca57_cx1)|(ca57_cx2,ca57_cx2)")
+
+(define_insn_reservation
+  "cortex_a57_neon_fp_mla_q" 9
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_fp_mla_q"))
+  "(ca57_cx1+ca57_cx2),(ca57_cx1,ca57_cx2)")
+
+(define_insn_reservation
+  "cortex_a57_neon_fp_recpe_rsqrte" 6
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_fp_recpe_rsqrte"))
+  "(ca57_cx1|ca57_cx2)")
+
+(define_insn_reservation
+  "cortex_a57_neon_fp_recpe_rsqrte_q" 6
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_fp_recpe_rsqrte_q"))
+  "(ca57_cx1+ca57_cx2)")
+
+(define_insn_reservation
+  "cortex_a57_neon_fp_recps_rsqrts" 10
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_fp_recps_rsqrts"))
+  "(ca57_cx1|ca57_cx2)")
+
+(define_insn_reservation
+  "cortex_a57_neon_fp_recps_rsqrts_q" 10
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_fp_recps_rsqrts_q"))
+  "(ca57_cx1+ca57_cx2)")
+
+;; Miscellaneous Instructions.
+
+(define_insn_reservation
+  "cortex_a57_neon_bitops" 4
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_bitops"))
+  "(ca57_cx1|ca57_cx2)")
+
+(define_insn_reservation
+  "cortex_a57_neon_bitops_q" 4
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_bitops_q"))
+  "(ca57_cx1+ca57_cx2)")
+
+(define_insn_reservation
+  "cortex_a57_neon_from_gp" 9
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_from_gp"))
+  "(ca57_ls_issue+ca57_cx1_issue,ca57_cx1)
+	       |(ca57_ls_issue+ca57_cx2_issue,ca57_cx2)")
+
+(define_insn_reservation
+  "cortex_a57_neon_from_gp_q" 9
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_from_gp_q"))
+  "(ca57_ls_issue+ca57_cx1_issue,ca57_cx1)
+	       +(ca57_ls_issue+ca57_cx2_issue,ca57_cx2)")
+
+(define_insn_reservation
+  "cortex_a57_neon_tbl3_tbl4" 7
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_tbl3_tbl4"))
+  "(ca57_cx1_issue,ca57_cx1)
+	       +(ca57_cx2_issue,ca57_cx2)")
+
+(define_insn_reservation
+  "cortex_a57_neon_zip_q" 7
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_zip_q"))
+  "(ca57_cx1_issue,ca57_cx1)
+	       +(ca57_cx2_issue,ca57_cx2)")
+
+(define_insn_reservation
+  "cortex_a57_neon_to_gp" 7
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_to_gp"))
+  "((ca57_ls_issue+ca57_sx1_issue),ca57_sx1)
+   |((ca57_ls_issue+ca57_sx2_issue),ca57_sx2)")
+
+;; Load Instructions.
+
+(define_insn_reservation
+  "cortex_a57_neon_load_a" 6
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_load_a"))
+  "ca57_load_model")
+
+(define_insn_reservation
+  "cortex_a57_neon_load_b" 7
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_load_b"))
+  "ca57_ls_issue,ca57_ls_issue+ca57_ldr,ca57_ldr*2")
+
+(define_insn_reservation
+  "cortex_a57_neon_load_c" 9
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_load_c"))
+  "ca57_load_model+(ca57_cx1|ca57_cx2)")
+
+(define_insn_reservation
+  "cortex_a57_neon_load_d" 11
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_load_d"))
+  "ca57_cx1_issue+ca57_cx2_issue,
+   ca57_ls_issue+ca57_ls_issue,ca57_ldr*2")
+
+(define_insn_reservation
+  "cortex_a57_neon_load_e" 9
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_load_e"))
+  "ca57_load_model+(ca57_cx1|ca57_cx2)")
+
+(define_insn_reservation
+  "cortex_a57_neon_load_f" 11
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_load_f"))
+  "ca57_cx1_issue+ca57_cx2_issue,
+   ca57_ls_issue+ca57_ls_issue,ca57_ldr*2")
+
+;; Store Instructions.
+
+(define_insn_reservation
+  "cortex_a57_neon_store_a" 0
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_store_a"))
+  "ca57_store_model")
+
+(define_insn_reservation
+  "cortex_a57_neon_store_b" 0
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_store_b"))
+  "ca57_store_model")
+
+;; These block issue for a number of cycles proportional to the number
+;; of 64-bit chunks they will store, we don't attempt to model that
+;; precisely, treat them as blocking execution for two cycles when
+;; issued.
+(define_insn_reservation
+  "cortex_a57_neon_store_complex" 0
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "cortex_a57_neon_type" "neon_store_complex"))
+  "ca57_block*2")
+
+;; Floating-Point Operations.
+
+(define_insn_reservation "cortex_a57_fp_const" 4
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "fconsts,fconstd"))
+  "(ca57_cx1|ca57_cx2)")
+
+(define_insn_reservation "cortex_a57_fp_add_sub" 6
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "fadds,faddd"))
+  "(ca57_cx1|ca57_cx2)")
+
+(define_insn_reservation "cortex_a57_fp_mul" 6
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "fmuls,fmuld"))
+  "(ca57_cx1|ca57_cx2)")
+
+(define_insn_reservation "cortex_a57_fp_mac" 10
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "fmacs,ffmas,fmacd,ffmad"))
+  "(ca57_cx1,nothing,nothing,ca57_cx1) \
+   |(ca57_cx2,nothing,nothing,ca57_cx2)")
+
+(define_insn_reservation "cortex_a57_fp_cvt" 6
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "f_cvt,f_cvtf2i,f_cvti2f"))
+  "(ca57_cx1|ca57_cx2)")
+
+(define_insn_reservation "cortex_a57_fp_cmp" 7
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "fcmps,fcmpd"))
+  "ca57_cx2")
+
+(define_insn_reservation "cortex_a57_fp_arith" 4
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "ffariths,ffarithd"))
+  "(ca57_cx1|ca57_cx2)")
+
+(define_insn_reservation "cortex_a57_fp_cpys" 4
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "fmov"))
+  "(ca57_cx1|ca57_cx2)")
+
+(define_insn_reservation "cortex_a57_fp_divs" 12
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "fdivs, fsqrts,\
+			neon_fp_div_s, neon_fp_sqrt_s"))
+  "ca57_cx2_block*5")
+
+(define_insn_reservation "cortex_a57_fp_divd" 16
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "fdivd, fsqrtd, neon_fp_div_d, neon_fp_sqrt_d"))
+  "ca57_cx2_block*3")
+
+(define_insn_reservation "cortex_a57_neon_fp_div_q" 20
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "fdivd, fsqrtd,\
+			 neon_fp_div_s_q, neon_fp_div_d_q,\
+			 neon_fp_sqrt_s_q, neon_fp_sqrt_d_q"))
+  "ca57_cx2_block*3")
+
+(define_insn_reservation "cortex_a57_crypto_simple" 4
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "crypto_aese,crypto_aesmc,crypto_sha1_fast"))
+  "ca57_cx2")
+
+(define_insn_reservation "cortex_a57_crypto_complex" 7
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "crypto_sha1_slow"))
+  "ca57_cx2+(ca57_cx2_issue,ca57_cx2)")
+
+(define_insn_reservation "cortex_a57_crypto_xor" 7
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "crypto_sha1_xor"))
+  "(ca57_cx1+ca57_cx2)")
+
+;; We lie with calls.  They take up all issue slots, but are otherwise
+;; not harmful.
+(define_insn_reservation "cortex_a57_call" 1
+  (and (eq_attr "tune" "cortexa57")
+       (eq_attr "type" "call"))
+  "ca57_sx1_issue+ca57_sx2_issue+ca57_cx1_issue+ca57_cx2_issue\
+    +ca57_mx_issue+ca57_bx_issue+ca57_ls_issue"
+)
+
+;; Simple execution unit bypasses
+(define_bypass 1 "cortex_a57_alu"
+	         "cortex_a57_alu,cortex_a57_alu_shift,cortex_a57_alu_shift_reg")
+(define_bypass 2 "cortex_a57_alu_shift"
+	         "cortex_a57_alu,cortex_a57_alu_shift,cortex_a57_alu_shift_reg")
+(define_bypass 2 "cortex_a57_alu_shift_reg"
+	         "cortex_a57_alu,cortex_a57_alu_shift,cortex_a57_alu_shift_reg")
+(define_bypass 1 "cortex_a57_alu" "cortex_a57_load1,cortex_a57_load3")
+(define_bypass 2 "cortex_a57_alu_shift" "cortex_a57_load1,cortex_a57_load3")
+(define_bypass 2 "cortex_a57_alu_shift_reg"
+	         "cortex_a57_load1,cortex_a57_load3")
+
+;; An MLA or a MUL can feed a dependent MLA.
+(define_bypass 5 "cortex_a57_neon_*mla*,cortex_a57_neon_*mul*"
+		 "cortex_a57_neon_*mla*")
+
+(define_bypass 5 "cortex_a57_fp_mul,cortex_a57_fp_mac"
+		 "cortex_a57_fp_mac")
+
+;; We don't need to care about control hazards, either the branch is
+;; predicted in which case we pay no penalty, or the branch is
+;; mispredicted in which case instruction scheduling will be unlikely to
+;; help.
+(define_bypass 1 "cortex_a57_*"
+		 "cortex_a57_call,cortex_a57_branch")
+
--- a/src/gcc/config/arm/bpabi.h
+++ b/src/gcc/config/arm/bpabi.h
@@ -64,16 +64,23 @@
   " %{!mlittle-endian:%{march=armv7-a|mcpu=cortex-a5    \
    |mcpu=cortex-a7                                      \
    |mcpu=cortex-a8|mcpu=cortex-a9|mcpu=cortex-a15       \
-   |mcpu=cortex-a12					\
+   |mcpu=cortex-a12|mcpu=cortex-a17			\
    |mcpu=cortex-a15.cortex-a7				\
+   |mcpu=cortex-a17.cortex-a7				\
    |mcpu=marvell-pj4					\
    |mcpu=cortex-a53					\
    |mcpu=cortex-a57					\
    |mcpu=cortex-a57.cortex-a53				\
+   |mcpu=cortex-a72					\
+   |mcpu=cortex-a72.cortex-a53				\
+   |mcpu=xgene1                                         \
+   |mcpu=cortex-m1.small-multiply                       \
+   |mcpu=cortex-m0.small-multiply                       \
+   |mcpu=cortex-m0plus.small-multiply			\
    |mcpu=generic-armv7-a                                \
    |march=armv7ve	                                \
    |march=armv7-m|mcpu=cortex-m3                        \
-   |march=armv7e-m|mcpu=cortex-m4                       \
+   |march=armv7e-m|mcpu=cortex-m4|mcpu=cortex-m7        \
    |march=armv6-m|mcpu=cortex-m0                        \
    |march=armv8-a					\
    :%{!r:--be8}}}"
@@ -82,16 +89,23 @@
   " %{mbig-endian:%{march=armv7-a|mcpu=cortex-a5        \
    |mcpu=cortex-a7                                      \
    |mcpu=cortex-a8|mcpu=cortex-a9|mcpu=cortex-a15       \
-   |mcpu=cortex-a12					\
+   |mcpu=cortex-a12|mcpu=cortex-a17			\
    |mcpu=cortex-a15.cortex-a7				\
+   |mcpu=cortex-a17.cortex-a7				\
    |mcpu=cortex-a53					\
    |mcpu=cortex-a57					\
    |mcpu=cortex-a57.cortex-a53				\
+   |mcpu=cortex-a72					\
+   |mcpu=cortex-a72.cortex-a53				\
+   |mcpu=xgene1                                         \
+   |mcpu=cortex-m1.small-multiply                       \
+   |mcpu=cortex-m0.small-multiply                       \
+   |mcpu=cortex-m0plus.small-multiply                   \
    |mcpu=marvell-pj4					\
    |mcpu=generic-armv7-a                                \
    |march=armv7ve	                                \
    |march=armv7-m|mcpu=cortex-m3                        \
-   |march=armv7e-m|mcpu=cortex-m4                       \
+   |march=armv7e-m|mcpu=cortex-m4|mcpu=cortex-m7        \
    |march=armv6-m|mcpu=cortex-m0                        \
    |march=armv8-a					\
    :%{!r:--be8}}}"
--- a/src/gcc/config/arm/iterators.md
+++ b/src/gcc/config/arm/iterators.md
@@ -116,6 +116,9 @@
 ;; Vector modes including 64-bit integer elements, but no floats.
 (define_mode_iterator VDQIX [V8QI V16QI V4HI V8HI V2SI V4SI DI V2DI])
 
+;; Vector modes for H, S and D types.
+(define_mode_iterator VDQHSD [V4HI V8HI V2SI V4SI V2DI])
+
 ;; Vector modes for float->int conversions.
 (define_mode_iterator VCVTF [V2SF V4SF])
 
@@ -191,6 +194,23 @@
 ;; Right shifts
 (define_code_iterator rshifts [ashiftrt lshiftrt])
 
+;; Iterator for integer conversions
+(define_code_iterator FIXUORS [fix unsigned_fix])
+
+;; Binary operators whose second operand can be shifted.
+(define_code_iterator shiftable_ops [plus minus ior xor and])
+
+;; plus and minus are the only shiftable_ops for which Thumb2 allows
+;; a stack pointer opoerand.  The minus operation is a candidate for an rsub
+;; and hence only plus is supported.
+(define_code_attr t2_binop0
+  [(plus "rk") (minus "r") (ior "r") (xor "r") (and "r")])
+
+;; The instruction to use when a shiftable_ops has a shift operation as
+;; its first operand.
+(define_code_attr arith_shift_insn
+  [(plus "add") (minus "rsb") (ior "orr") (xor "eor") (and "and")])
+
 ;;----------------------------------------------------------------------------
 ;; Int iterators
 ;;----------------------------------------------------------------------------
@@ -198,9 +218,13 @@
 (define_int_iterator VRINT [UNSPEC_VRINTZ UNSPEC_VRINTP UNSPEC_VRINTM
                             UNSPEC_VRINTR UNSPEC_VRINTX UNSPEC_VRINTA])
 
+(define_int_iterator VCVT [UNSPEC_VRINTP UNSPEC_VRINTM UNSPEC_VRINTA])
+
 (define_int_iterator NEON_VRINT [UNSPEC_NVRINTP UNSPEC_NVRINTZ UNSPEC_NVRINTM
                               UNSPEC_NVRINTX UNSPEC_NVRINTA UNSPEC_NVRINTN])
 
+(define_int_iterator NEON_VCVT [UNSPEC_NVRINTP UNSPEC_NVRINTM UNSPEC_NVRINTA])
+
 (define_int_iterator CRC [UNSPEC_CRC32B UNSPEC_CRC32H UNSPEC_CRC32W
                           UNSPEC_CRC32CB UNSPEC_CRC32CH UNSPEC_CRC32CW])
 
@@ -502,6 +526,13 @@
 ;; Assembler mnemonics for signedness of widening operations.
 (define_code_attr US [(sign_extend "s") (zero_extend "u")])
 
+;; Signedness suffix for float->fixed conversions.  Empty for signed
+;; conversion.
+(define_code_attr su_optab [(fix "") (unsigned_fix "u")])
+
+;; Sign prefix to use in instruction type suffixes, i.e. s32, u32.
+(define_code_attr su [(fix "s") (unsigned_fix "u")])
+
 ;; Right shifts
 (define_code_attr shift [(ashiftrt "ashr") (lshiftrt "lshr")])
 (define_code_attr shifttype [(ashiftrt "signed") (lshiftrt "unsigned")])
--- a/src/gcc/config/arm/neon-testgen.ml
+++ b/src/gcc/config/arm/neon-testgen.ml
@@ -46,20 +46,19 @@
     failwith ("Could not create test source file " ^ name ^ ": " ^ str)
 
 (* Emit prologue code to a test source file.  *)
-let emit_prologue chan test_name effective_target =
+let emit_prologue chan test_name effective_target compile_test_optim =
   Printf.fprintf chan "/* Test the `%s' ARM Neon intrinsic.  */\n" test_name;
   Printf.fprintf chan "/* This file was autogenerated by neon-testgen.  */\n\n";
   Printf.fprintf chan "/* { dg-do assemble } */\n";
   Printf.fprintf chan "/* { dg-require-effective-target %s_ok } */\n"
                  effective_target;
-  Printf.fprintf chan "/* { dg-options \"-save-temps -O0\" } */\n";
+  Printf.fprintf chan "/* { dg-options \"-save-temps %s\" } */\n" compile_test_optim;
   Printf.fprintf chan "/* { dg-add-options %s } */\n" effective_target;
-  Printf.fprintf chan "\n#include \"arm_neon.h\"\n\n";
-  Printf.fprintf chan "void test_%s (void)\n{\n" test_name
+  Printf.fprintf chan "\n#include \"arm_neon.h\"\n\n"
 
-(* Emit declarations of local variables that are going to be passed
+(* Emit declarations of variables that are going to be passed
    to an intrinsic, together with one to take a returned value if needed.  *)
-let emit_automatics chan c_types features =
+let emit_variables chan c_types features spaces =
   let emit () =
     ignore (
       List.fold_left (fun arg_number -> fun (flags, ty) ->
@@ -69,8 +68,8 @@
                           (* Const arguments to builtins are directly
                              written in as constants.  *)
                           if not (List.mem Const flags) then
-                            Printf.fprintf chan "  %s %sarg%d_%s;\n"
-                                           ty pointer_bit arg_number ty;
+                            Printf.fprintf chan "%s%s %sarg%d_%s;\n"
+                                           spaces ty pointer_bit arg_number ty;
                         arg_number + 1)
                      0 (List.tl c_types))
   in
@@ -81,13 +80,13 @@
              allocation for vget_low tests or they fail because of copy
              elimination.  *)
           ((if List.mem Fixed_vector_reg features then
-              Printf.fprintf chan "  register %s out_%s asm (\"d18\");\n"
-                             return_ty return_ty
+              Printf.fprintf chan "%sregister %s out_%s asm (\"d18\");\n"
+                             spaces return_ty return_ty
             else if List.mem Fixed_core_reg features then
-              Printf.fprintf chan "  register %s out_%s asm (\"r0\");\n"
-                             return_ty return_ty
+              Printf.fprintf chan "%sregister %s out_%s asm (\"r0\");\n"
+                             spaces return_ty return_ty
             else
-              Printf.fprintf chan "  %s out_%s;\n" return_ty return_ty);
+              Printf.fprintf chan "%s%s out_%s;\n" spaces return_ty return_ty);
 	   emit ())
         end else
           (* The intrinsic does not return a value.  *)
@@ -173,6 +172,17 @@
     | _ -> assert false
   with Not_found -> "arm_neon"
 
+(* Work out what the testcase optimization level should be, default to -O0.  *)
+let compile_test_optim features =
+  try
+    match List.find (fun feature ->
+                       match feature with Compiler_optim _ -> true
+                                        | _ -> false)
+                     features with
+      Compiler_optim opt -> opt
+    | _ -> assert false
+  with Not_found -> "-O0"
+
 (* Given an intrinsic shape, produce a regexp that will match
    the right-hand sides of instructions generated by an intrinsic of
    that shape.  *)
@@ -280,12 +290,22 @@
 			  "!?\\(\\[ \t\\]+@\\[a-zA-Z0-9 \\]+\\)?\\n")
                          (analyze_all_shapes features shape analyze_shape)
   in
-  let effective_target = effective_target features
+  let effective_target = effective_target features in
+  let compile_test_optim = compile_test_optim features
   in
     (* Emit file and function prologues.  *)
-    emit_prologue chan test_name effective_target;
-    (* Emit local variable declarations.  *)
-    emit_automatics chan c_types features;
+    emit_prologue chan test_name effective_target compile_test_optim;
+
+    if (compare compile_test_optim "-O0") <> 0 then
+        (* Emit variable declarations.  *)
+        emit_variables chan c_types features "";
+
+    Printf.fprintf chan "void test_%s (void)\n{\n" test_name;
+
+    if compare compile_test_optim "-O0" = 0 then
+        (* Emit variable declarations.  *)
+        emit_variables chan c_types features "  ";
+
     Printf.fprintf chan "\n";
     (* Emit the call to the intrinsic.  *)
     emit_call chan const_valuator c_types name elt_ty;
--- a/src/gcc/config/arm/arm.md
+++ b/src/gcc/config/arm/arm.md
@@ -109,6 +109,11 @@
 ;; given instruction does not shift one of its input operands.
 (define_attr "shift" "" (const_int 0))
 
+;; [For compatibility with AArch64 in pipeline models]
+;; Attribute that specifies whether or not the instruction touches fp
+;; registers.
+(define_attr "fp" "no,yes" (const_string "no"))
+
 ; Floating Point Unit.  If we only have floating point emulation, then there
 ; is no point in scheduling the floating point insns.  (Well, for best
 ; performance we should try and group them together).
@@ -205,17 +210,9 @@
 	  (const_string "yes")]
 	 (const_string "no")))
 
-; Allows an insn to disable certain alternatives for reasons other than
-; arch support.
-(define_attr "insn_enabled" "no,yes"
-  (const_string "yes"))
-
 ; Enable all alternatives that are both arch_enabled and insn_enabled.
  (define_attr "enabled" "no,yes"
-   (cond [(eq_attr "insn_enabled" "no")
-	  (const_string "no")
-
-	  (and (eq_attr "predicable_short_it" "no")
+   (cond [(and (eq_attr "predicable_short_it" "no")
 	       (and (eq_attr "predicated" "yes")
 	            (match_test "arm_restrict_it")))
 	  (const_string "no")
@@ -377,7 +374,12 @@
 
 (define_attr "generic_sched" "yes,no"
   (const (if_then_else
-          (ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa7,cortexa8,cortexa9,cortexa12,cortexa15,cortexa53,cortexm4,marvell_pj4")
+          (ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,\
+                                arm926ejs,arm1020e,arm1026ejs,arm1136js,\
+                                arm1136jfs,cortexa5,cortexa7,cortexa8,\
+                                cortexa9,cortexa12,cortexa15,cortexa17,\
+                                cortexa53,cortexa57,cortexm4,cortexm7,\
+				marvell_pj4,xgene1")
 	       (eq_attr "tune_cortexr4" "yes"))
           (const_string "no")
           (const_string "yes"))))
@@ -385,7 +387,9 @@
 (define_attr "generic_vfp" "yes,no"
   (const (if_then_else
 	  (and (eq_attr "fpu" "vfp")
-	       (eq_attr "tune" "!arm1020e,arm1022e,cortexa5,cortexa7,cortexa8,cortexa9,cortexa53,cortexm4,marvell_pj4")
+	       (eq_attr "tune" "!arm1020e,arm1022e,cortexa5,cortexa7,\
+                                cortexa8,cortexa9,cortexa53,cortexm4,\
+                                cortexm7,marvell_pj4,xgene1")
 	       (eq_attr "tune_cortexr4" "no"))
 	  (const_string "yes")
 	  (const_string "no"))))
@@ -406,13 +410,17 @@
 (include "cortex-a8.md")
 (include "cortex-a9.md")
 (include "cortex-a15.md")
+(include "cortex-a17.md")
 (include "cortex-a53.md")
+(include "cortex-a57.md")
 (include "cortex-r4.md")
 (include "cortex-r4f.md")
+(include "cortex-m7.md")
 (include "cortex-m4.md")
 (include "cortex-m4-fpu.md")
 (include "vfp11.md")
 (include "marvell-pj4.md")
+(include "xgene1.md")
 
 
 ;;---------------------------------------------------------------------------
@@ -2868,6 +2876,28 @@
    (set_attr "type" "multiple")]
 )
 
+(define_insn_and_split "*anddi_notdi_zesidi"
+  [(set (match_operand:DI 0 "s_register_operand" "=r")
+        (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "r"))
+                (zero_extend:DI
+                 (match_operand:SI 1 "s_register_operand" "r"))))]
+  "TARGET_32BIT"
+  "#"
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 0) (and:SI (not:SI (match_dup 2)) (match_dup 1)))
+   (set (match_dup 3) (const_int 0))]
+  "
+  {
+    operands[3] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[2] = gen_lowpart (SImode, operands[2]);
+  }"
+  [(set_attr "length" "8")
+   (set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "multiple")]
+)
+
 (define_insn_and_split "*anddi_notsesidi_di"
   [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
 	(and:DI (not:DI (sign_extend:DI
@@ -8906,7 +8936,7 @@
     return \"\";
   }"
   [(set_attr "conds" "use")
-   (set_attr "type" "f_sel<vfp_type>")]
+   (set_attr "type" "fcsel")]
 )
 
 (define_insn_and_split "*movsicc_insn"
@@ -9343,8 +9373,10 @@
   "TARGET_32BIT"
   "
   {
-    if (!REG_P (XEXP (operands[0], 0))
-       && (GET_CODE (XEXP (operands[0], 0)) != SYMBOL_REF))
+    if ((!REG_P (XEXP (operands[0], 0))
+	 && GET_CODE (XEXP (operands[0], 0)) != SYMBOL_REF)
+	|| (GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF
+	    && arm_is_long_call_p (SYMBOL_REF_DECL (XEXP (operands[0], 0)))))
      XEXP (operands[0], 0) = force_reg (SImode, XEXP (operands[0], 0));
 
     if (operands[2] == NULL_RTX)
@@ -9361,8 +9393,10 @@
   "TARGET_32BIT"
   "
   {
-    if (!REG_P (XEXP (operands[1], 0)) &&
-       (GET_CODE (XEXP (operands[1],0)) != SYMBOL_REF))
+    if ((!REG_P (XEXP (operands[1], 0))
+	 && GET_CODE (XEXP (operands[1], 0)) != SYMBOL_REF)
+	|| (GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
+	    && arm_is_long_call_p (SYMBOL_REF_DECL (XEXP (operands[1], 0)))))
      XEXP (operands[1], 0) = force_reg (SImode, XEXP (operands[1], 0));
 
     if (operands[3] == NULL_RTX)
@@ -9848,39 +9882,35 @@
 
 ;; Patterns to allow combination of arithmetic, cond code and shifts
 
-(define_insn "*arith_shiftsi"
-  [(set (match_operand:SI 0 "s_register_operand" "=r,r,r,r")
-        (match_operator:SI 1 "shiftable_operator"
-          [(match_operator:SI 3 "shift_operator"
-             [(match_operand:SI 4 "s_register_operand" "r,r,r,r")
-              (match_operand:SI 5 "shift_amount_operand" "M,M,M,r")])
-           (match_operand:SI 2 "s_register_operand" "rk,rk,r,rk")]))]
+(define_insn "*<arith_shift_insn>_multsi"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(shiftable_ops:SI
+	 (mult:SI (match_operand:SI 2 "s_register_operand" "r,r")
+		  (match_operand:SI 3 "power_of_two_operand" ""))
+	 (match_operand:SI 1 "s_register_operand" "rk,<t2_binop0>")))]
   "TARGET_32BIT"
-  "%i1%?\\t%0, %2, %4%S3"
+  "<arith_shift_insn>%?\\t%0, %1, %2, lsl %b3"
   [(set_attr "predicable" "yes")
    (set_attr "predicable_short_it" "no")
-   (set_attr "shift" "4")
-   (set_attr "arch" "a,t2,t2,a")
-   ;; Thumb2 doesn't allow the stack pointer to be used for 
-   ;; operand1 for all operations other than add and sub. In this case 
-   ;; the minus operation is a candidate for an rsub and hence needs
-   ;; to be disabled.
-   ;; We have to make sure to disable the fourth alternative if
-   ;; the shift_operator is MULT, since otherwise the insn will
-   ;; also match a multiply_accumulate pattern and validate_change
-   ;; will allow a replacement of the constant with a register
-   ;; despite the checks done in shift_operator.
-   (set_attr_alternative "insn_enabled"
-			 [(const_string "yes")
-			  (if_then_else
-			   (match_operand:SI 1 "add_operator" "")
-			   (const_string "yes") (const_string "no"))
-			  (const_string "yes")
-			  (if_then_else
-			   (match_operand:SI 3 "mult_operator" "")
-			   (const_string "no") (const_string "yes"))])
-   (set_attr "type" "alu_shift_imm,alu_shift_imm,alu_shift_imm,alu_shift_reg")])
+   (set_attr "shift" "2")
+   (set_attr "arch" "a,t2")
+   (set_attr "type" "alu_shift_imm")])
 
+(define_insn "*<arith_shift_insn>_shiftsi"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
+	(shiftable_ops:SI
+	 (match_operator:SI 2 "shift_nomul_operator"
+	  [(match_operand:SI 3 "s_register_operand" "r,r,r")
+	   (match_operand:SI 4 "shift_amount_operand" "M,M,r")])
+	 (match_operand:SI 1 "s_register_operand" "rk,<t2_binop0>,rk")))]
+  "TARGET_32BIT && GET_CODE (operands[2]) != MULT"
+  "<arith_shift_insn>%?\\t%0, %1, %3%S2"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "shift" "3")
+   (set_attr "arch" "a,t2,a")
+   (set_attr "type" "alu_shift_imm,alu_shift_imm,alu_shift_reg")])
+
 (define_split
   [(set (match_operand:SI 0 "s_register_operand" "")
 	(match_operator:SI 1 "shiftable_operator"
@@ -12169,7 +12199,7 @@
     int num_regs = XVECLEN (operands[0], 0);
     char pattern[100];
     rtx op_list[2];
-    strcpy (pattern, \"fldmfdd\\t\");
+    strcpy (pattern, \"vldm\\t\");
     strcat (pattern, reg_names[REGNO (SET_DEST (XVECEXP (operands[0], 0, 0)))]);
     strcat (pattern, \"!, {\");
     op_list[0] = XEXP (XVECEXP (operands[0], 0, 1), 0);
@@ -12379,6 +12409,7 @@
   "TARGET_32BIT && arm_arch5"
   "clz%?\\t%0, %1"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "clz")])
 
 (define_insn "rbitsi2"
@@ -12387,6 +12418,7 @@
   "TARGET_32BIT && arm_arch_thumb2"
   "rbit%?\\t%0, %1"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "clz")])
 
 (define_expand "ctzsi2"
@@ -12562,6 +12594,8 @@
    rev%?\t%0, %1"
   [(set_attr "arch" "t1,t2,32")
    (set_attr "length" "2,2,4")
+   (set_attr "predicable" "no,yes,yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "rev")]
 )
 
@@ -12679,6 +12713,44 @@
    (set_attr "type" "rev")]
 )
 
+;; There are no canonicalisation rules for the position of the lshiftrt, ashift
+;; operations within an IOR/AND RTX, therefore we have two patterns matching
+;; each valid permutation.
+
+(define_insn "arm_rev16si2"
+  [(set (match_operand:SI 0 "register_operand" "=l,l,r")
+        (ior:SI (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "l,l,r")
+                                   (const_int 8))
+                        (match_operand:SI 3 "const_int_operand" "n,n,n"))
+                (and:SI (lshiftrt:SI (match_dup 1)
+                                     (const_int 8))
+                        (match_operand:SI 2 "const_int_operand" "n,n,n"))))]
+  "arm_arch6
+   && aarch_rev16_shleft_mask_imm_p (operands[3], SImode)
+   && aarch_rev16_shright_mask_imm_p (operands[2], SImode)"
+  "rev16\\t%0, %1"
+  [(set_attr "arch" "t1,t2,32")
+   (set_attr "length" "2,2,4")
+   (set_attr "type" "rev")]
+)
+
+(define_insn "arm_rev16si2_alt"
+  [(set (match_operand:SI 0 "register_operand" "=l,l,r")
+        (ior:SI (and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" "l,l,r")
+                                     (const_int 8))
+                        (match_operand:SI 2 "const_int_operand" "n,n,n"))
+                (and:SI (ashift:SI (match_dup 1)
+                                   (const_int 8))
+                        (match_operand:SI 3 "const_int_operand" "n,n,n"))))]
+  "arm_arch6
+   && aarch_rev16_shleft_mask_imm_p (operands[3], SImode)
+   && aarch_rev16_shright_mask_imm_p (operands[2], SImode)"
+  "rev16\\t%0, %1"
+  [(set_attr "arch" "t1,t2,32")
+   (set_attr "length" "2,2,4")
+   (set_attr "type" "rev")]
+)
+
 (define_expand "bswaphi2"
   [(set (match_operand:HI 0 "s_register_operand" "=r")
 	(bswap:HI (match_operand:HI 1 "s_register_operand" "r")))]
--- a/src/gcc/config/arm/xgene1.md
+++ b/src/gcc/config/arm/xgene1.md
@@ -0,0 +1,531 @@
+;; Machine description for AppliedMicro xgene1 core.
+;; Copyright (C) 2012-2015 Free Software Foundation, Inc.
+;; Contributed by Theobroma Systems Design und Consulting GmbH.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Pipeline description for the xgene1 micro-architecture
+
+(define_automaton "xgene1")
+
+(define_cpu_unit "xgene1_decode_out0" "xgene1")
+(define_cpu_unit "xgene1_decode_out1" "xgene1")
+(define_cpu_unit "xgene1_decode_out2" "xgene1")
+(define_cpu_unit "xgene1_decode_out3" "xgene1")
+
+(define_cpu_unit "xgene1_divide" "xgene1")
+(define_cpu_unit "xgene1_fp_divide" "xgene1")
+(define_cpu_unit "xgene1_fsu" "xgene1")
+(define_cpu_unit "xgene1_fcmp" "xgene1")
+
+(define_reservation "xgene1_decode1op"
+        "( xgene1_decode_out0 )
+        |( xgene1_decode_out1 )
+        |( xgene1_decode_out2 )
+        |( xgene1_decode_out3 )"
+)
+(define_reservation "xgene1_decode2op"
+        "( xgene1_decode_out0 + xgene1_decode_out1 )
+        |( xgene1_decode_out0 + xgene1_decode_out2 )
+        |( xgene1_decode_out0 + xgene1_decode_out3 )
+        |( xgene1_decode_out1 + xgene1_decode_out2 )
+        |( xgene1_decode_out1 + xgene1_decode_out3 )
+        |( xgene1_decode_out2 + xgene1_decode_out3 )"
+)
+(define_reservation "xgene1_decodeIsolated"
+        "( xgene1_decode_out0 + xgene1_decode_out1 + xgene1_decode_out2 + xgene1_decode_out3 )"
+)
+
+(define_insn_reservation "xgene1_branch" 1
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "branch"))
+  "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_nop" 1
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "no_insn"))
+  "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_call" 1
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "call"))
+  "xgene1_decode2op")
+
+(define_insn_reservation "xgene1_f_load" 10
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "f_loadd,f_loads"))
+  "xgene1_decode2op")
+
+(define_insn_reservation "xgene1_f_store" 4
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "f_stored,f_stores"))
+  "xgene1_decode2op")
+
+(define_insn_reservation "xgene1_fmov" 2
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "fmov,fconsts,fconstd"))
+  "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_f_mcr" 10
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "f_mcr"))
+  "xgene1_decodeIsolated")
+
+(define_insn_reservation "xgene1_f_mrc" 4
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "f_mrc"))
+  "xgene1_decode2op")
+
+(define_insn_reservation "xgene1_load_pair" 6
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "load2"))
+  "xgene1_decodeIsolated")
+
+(define_insn_reservation "xgene1_store_pair" 2
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "store2"))
+  "xgene1_decodeIsolated")
+
+(define_insn_reservation "xgene1_fp_load1" 10
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "load1")
+       (eq_attr "fp" "yes"))
+  "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_load1" 5
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "load1"))
+  "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_store1" 2
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "store1"))
+  "xgene1_decode2op")
+
+(define_insn_reservation "xgene1_move" 1
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "mov_reg,mov_imm,mrs"))
+  "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_alu" 1
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "alu_imm,alu_reg,alu_shift_imm,\
+                        alu_ext,adc_reg,csel,logic_imm,\
+                        logic_reg,logic_shift_imm,clz,\
+                        rbit,shift_reg,adr,mov_reg,\
+                        mov_imm,extend"))
+  "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_simd" 1
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "rev"))
+  "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_alus" 1
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "alus_imm,alu_reg,alus_shift_imm,\
+                        alus_ext,logics_imm,logics_reg,\
+                        logics_shift_imm"))
+  "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_mul" 6
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "mul,mla,smull,umull,smlal,umlal"))
+  "xgene1_decode2op")
+
+(define_insn_reservation "xgene1_div" 34
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "sdiv,udiv"))
+  "xgene1_decode1op,xgene1_divide*7")
+
+(define_insn_reservation "xgene1_fcmp" 10
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "fcmpd,fcmps"))
+  "xgene1_decode1op,xgene1_fsu+xgene1_fcmp*3")
+
+(define_insn_reservation "xgene1_fcsel" 3
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "fcsel"))
+  "xgene1_decode1op,xgene1_fsu")
+
+(define_insn_reservation "xgene1_bfm" 2
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "bfm"))
+  "xgene1_decode1op,xgene1_fsu")
+
+(define_insn_reservation "xgene1_f_rint" 5
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "f_rintd,f_rints"))
+  "xgene1_decode1op,xgene1_fsu")
+
+(define_insn_reservation "xgene1_f_cvt" 3
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "f_cvt"))
+  "xgene1_decode1op,xgene1_fsu")
+
+(define_insn_reservation "xgene1_f_cvtf2i" 11
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "f_cvtf2i"))
+  "xgene1_decodeIsolated,xgene1_fsu")
+
+(define_insn_reservation "xgene1_f_cvti2f" 14
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "f_cvti2f"))
+  "xgene1_decodeIsolated,xgene1_fsu")
+
+(define_insn_reservation "xgene1_f_add" 5
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "faddd,fadds,fmuld,fmuls"))
+  "xgene1_decode1op,xgene1_fsu")
+
+(define_insn_reservation "xgene1_f_divs" 22
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "fdivs,fsqrts"))
+  "xgene1_decode1op,(xgene1_fp_divide+xgene1_fsu)*8,xgene1_fp_divide*14")
+
+(define_insn_reservation "xgene1_f_divd" 28
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "fdivd"))
+  "xgene1_decode1op,(xgene1_fp_divide+xgene1_fsu)*11,xgene1_fp_divide*17")
+
+(define_insn_reservation "xgene1_f_sqrtd" 28
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "fsqrtd"))
+  "xgene1_decode1op,(xgene1_fp_divide+xgene1_fsu)*17,xgene1_fp_divide*11")
+
+(define_insn_reservation "xgene1_f_arith" 2
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "ffarithd,ffariths"))
+  "xgene1_decode1op,xgene1_fsu")
+
+(define_insn_reservation "xgene1_f_select" 3
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "f_minmaxd,f_minmaxs"))
+  "xgene1_decode1op,xgene1_fsu")
+
+(define_insn_reservation "xgene1_neon_dup" 3
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_dup,neon_dup_q"))
+  "xgene1_decode1op,xgene1_fsu")
+
+(define_insn_reservation "xgene1_neon_load1" 11
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_load1_1reg, neon_load1_1reg_q"))
+  "xgene1_decode2op,xgene1_fsu")
+
+(define_insn_reservation "xgene1_neon_store1" 5
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_store1_1reg, neon_store1_1reg_q"))
+  "xgene1_decode2op,xgene1_fsu")
+
+(define_insn_reservation "xgene1_neon_logic" 2
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_logic,\
+                        neon_logic_q,\
+                        neon_bsl,\
+                        neon_bsl_q,\
+                        neon_move,\
+                        neon_move_q,\
+                       "))
+  "xgene1_decode1op,xgene1_fsu")
+
+(define_insn_reservation "xgene1_neon_umov" 7
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_to_gp, neon_to_gp_q"))
+  "xgene1_decodeIsolated")
+
+(define_insn_reservation "xgene1_neon_ins" 14
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_from_gp,\
+                        neon_from_gp_q,\
+                        neon_ins,\
+                        neon_ins_q,\
+                       "))
+  "xgene1_decodeIsolated,xgene1_fsu")
+
+(define_insn_reservation "xgene1_neon_shift" 3
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_shift_imm,\
+                        neon_shift_imm_q,\
+                        neon_shift_reg,\
+                        neon_shift_reg_q,\
+                        neon_shift_imm_long,\
+                        neon_sat_shift_imm,\
+                        neon_sat_shift_imm_q,\
+                        neon_sat_shift_imm_narrow_q,\
+                        neon_sat_shift_reg,\
+                        neon_sat_shift_reg_q,\
+                        neon_shift_imm_narrow_q,\
+                       "))
+  "xgene1_decode1op,xgene1_fsu")
+
+(define_insn_reservation "xgene1_neon_arith" 3
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_add,\
+                        neon_add_q,\
+                        neon_sub,\
+                        neon_sub_q,\
+                        neon_neg,\
+                        neon_neg_q,\
+                        neon_abs,\
+                        neon_abs_q,\
+                        neon_abd_q,\
+                        neon_arith_acc,\
+                        neon_arith_acc_q,\
+                        neon_reduc_add,\
+                        neon_reduc_add_q,\
+                        neon_add_halve,\
+                        neon_add_halve_q,\
+                        neon_sub_halve,\
+                        neon_sub_halve_q,\
+                        neon_qadd,\
+                        neon_qadd_q,\
+                        neon_compare,\
+                        neon_compare_q,\
+                        neon_compare_zero,\
+                        neon_compare_zero_q,\
+                        neon_tst,\
+                        neon_tst_q,\
+                       "))
+  "xgene1_decode1op,xgene1_fsu")
+
+(define_insn_reservation "xgene1_neon_abs_diff" 6
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_arith_acc,neon_arith_acc_q"))
+  "xgene1_decode2op,xgene1_fsu*2")
+
+(define_insn_reservation "xgene1_neon_mul" 5
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_mul_b,\
+                        neon_mul_b_q,\
+                        neon_mul_h,\
+                        neon_mul_h_q,\
+                        neon_mul_s,\
+                        neon_mul_s_q,\
+                        neon_fp_mul_s_scalar,\
+                        neon_fp_mul_s_scalar_q,\
+                        neon_fp_mul_d_scalar_q,\
+                        neon_mla_b,neon_mla_b_q,\
+                        neon_mla_h,neon_mla_h_q,\
+                        neon_mla_s,neon_mla_s_q,\
+                        neon_mla_h_scalar,\
+                        neon_mla_h_scalar_q,\
+                        neon_mla_s_scalar,\
+                        neon_mla_s_scalar_q,\
+                        neon_mla_b_long,\
+                        neon_mla_h_long,\
+                        neon_mla_s_long,\
+                        neon_fp_mul_s,\
+                        neon_fp_mul_s_q,\
+                        neon_fp_mul_d,\
+                        neon_fp_mul_d_q,\
+                        neon_fp_mla_s,\
+                        neon_fp_mla_s_q,\
+                        neon_fp_mla_d,\
+                        neon_fp_mla_d_q,\
+                        neon_fp_mla_s_scalar,\
+                        neon_fp_mla_s_scalar_q,\
+                        neon_fp_mla_d_scalar_q,\
+                        neon_sat_mul_b,\
+                        neon_sat_mul_b_q,\
+                        neon_sat_mul_h,\
+                        neon_sat_mul_h_q,\
+                        neon_sat_mul_s,\
+                        neon_sat_mul_s_q,\
+                        neon_sat_mul_h_scalar,\
+                        neon_sat_mul_h_scalar_q,\
+                        neon_sat_mul_s_scalar,\
+                        neon_sat_mul_s_scalar_q,\
+                        neon_sat_mul_h_scalar_long,\
+                        neon_sat_mul_s_scalar_long,\
+                        neon_sat_mla_b_long,\
+                        neon_sat_mla_h_long,\
+                        neon_sat_mla_s_long,\
+                        neon_sat_mla_h_scalar_long,\
+                        neon_sat_mla_s_scalar_long,\
+                       "))
+  "xgene1_decode2op,xgene1_fsu*2")
+
+(define_insn_reservation "xgene1_fp_abd_diff" 5
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_fp_abd_s,\
+                        neon_fp_abd_s_q,\
+                        neon_fp_abd_d,\
+                        neon_fp_abd_d_q,\
+                       "))
+  "xgene1_decode1op,xgene1_fsu")
+
+(define_insn_reservation "xgene1_neon_f_add" 5
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_fp_addsub_s,\
+                        neon_fp_addsub_s_q,\
+                        neon_fp_addsub_d,\
+                        neon_fp_addsub_d_q,\
+                       "))
+  "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_neon_f_div" 2
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_fp_div_s,\
+                        neon_fp_div_s_q,\
+                        neon_fp_div_d,\
+                        neon_fp_div_d_q,\
+                       "))
+  "xgene1_decode1op,(xgene1_fsu+xgene1_fp_divide)")
+
+(define_insn_reservation "xgene1_neon_f_neg" 2
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_fp_neg_s,\
+                        neon_fp_neg_s_q,\
+                        neon_fp_neg_d,\
+                        neon_fp_neg_d_q,\
+                        neon_fp_abs_s,\
+                        neon_fp_abs_s_q,\
+                        neon_fp_abs_d,\
+                        neon_fp_abs_d_q,\
+                       "))
+  "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_neon_f_round" 5
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_fp_round_s,\
+                        neon_fp_round_s_q,\
+                        neon_fp_round_d,\
+                        neon_fp_round_d_q,\
+                       "))
+  "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_neon_f_cvt" 5
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type"  "neon_int_to_fp_s,\
+                         neon_int_to_fp_s_q,\
+                         neon_int_to_fp_d,\
+                         neon_int_to_fp_d_q,\
+                         neon_fp_cvt_widen_s,\
+                         neon_fp_cvt_narrow_s_q,\
+                         neon_fp_cvt_narrow_d_q,\
+                        "))
+  "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_neon_f_reduc" 5
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_fp_reduc_add_s,\
+                        neon_fp_reduc_add_s_q,\
+                        neon_fp_reduc_add_d,\
+                        neon_fp_reduc_add_d_q,\
+                       "))
+  "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_neon_cls" 2
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_cls,neon_cls_q"))
+  "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_neon_st1" 4
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_store1_one_lane,\
+                        neon_store1_one_lane_q,\
+                       "))
+  "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_neon_halve_narrow" 6
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_sub_halve_narrow_q,\
+                        neon_add_halve_narrow_q,\
+                       "))
+  "xgene1_decodeIsolated")
+
+(define_insn_reservation "xgene1_neon_shift_acc" 6
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_shift_acc,\
+                        neon_shift_acc_q,\
+                       "))
+  "xgene1_decode2op")
+
+(define_insn_reservation "xgene1_neon_fp_compare" 3
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_fp_compare_s,\
+                        neon_fp_compare_s_q,\
+                        neon_fp_compare_d,\
+                        neon_fp_compare_d_q,\
+                       "))
+  "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_neon_fp_sqrt" 2
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_fp_sqrt_s,\
+                        neon_fp_sqrt_s_q,\
+                        neon_fp_sqrt_d,\
+                        neon_fp_sqrt_d_q,\
+                       "))
+  "xgene1_decode1op,(xgene1_fsu+xgene1_fp_divide)")
+
+(define_insn_reservation "xgene1_neon_tbl1" 4
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_tbl1,\
+                        neon_tbl1_q,\
+                       "))
+  "xgene1_decode2op")
+
+(define_insn_reservation "xgene1_neon_tbl2" 8
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_tbl2,\
+                        neon_tbl2_q,\
+                       "))
+  "xgene1_decodeIsolated")
+
+(define_insn_reservation "xgene1_neon_permute" 3
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_permute,\
+                        neon_permute_q,\
+                       "))
+  "xgene1_decode2op")
+
+(define_insn_reservation "xgene1_neon_ld1r" 10
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_load1_all_lanes,\
+                       "))
+  "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_neon_fp_recp" 3
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_fp_recpe_s,\
+                        neon_fp_recpe_s_q,\
+                        neon_fp_recpe_d,\
+                        neon_fp_recpe_d_q,\
+                        neon_fp_recpx_s,\
+                        neon_fp_recpx_s_q,\
+                        neon_fp_recpx_d,\
+                        neon_fp_recpx_d_q,\
+                       "))
+  "xgene1_decode1op")
+
+
+(define_insn_reservation "xgene1_neon_fp_recp_s" 5
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_fp_recps_s,\
+                        neon_fp_recps_s_q,\
+                        neon_fp_recps_d,\
+                        neon_fp_recps_d_q,\
+                       "))
+  "xgene1_decode1op")
+
+(define_insn_reservation "xgene1_neon_pmull" 5
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_mul_d_long,\
+                       "))
+  "xgene1_decode2op")
--- a/src/gcc/config/arm/driver-arm.c
+++ b/src/gcc/config/arm/driver-arm.c
@@ -41,6 +41,7 @@
     {"0xc08", "armv7-a", "cortex-a8"},
     {"0xc09", "armv7-a", "cortex-a9"},
     {"0xc0d", "armv7ve", "cortex-a12"},
+    {"0xc0e", "armv7ve", "cortex-a17"},
     {"0xc0f", "armv7ve", "cortex-a15"},
     {"0xc14", "armv7-r", "cortex-r4"},
     {"0xc15", "armv7-r", "cortex-r5"},
--- a/src/gcc/config/arm/cortex-a5.md
+++ b/src/gcc/config/arm/cortex-a5.md
@@ -61,7 +61,7 @@
        (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
                         alu_reg,alus_reg,logic_reg,logics_reg,\
                         adc_imm,adcs_imm,adc_reg,adcs_reg,\
-                        adr,bfm,rev,\
+                        adr,bfm,clz,rbit,rev,\
                         shift_imm,shift_reg,\
                         mov_imm,mov_reg,mvn_imm,mvn_reg,\
                         mrs,multiple,no_insn"))
--- a/src/gcc/config/arm/cortex-a9.md
+++ b/src/gcc/config/arm/cortex-a9.md
@@ -83,7 +83,7 @@
        (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
                         alu_reg,alus_reg,logic_reg,logics_reg,\
                         adc_imm,adcs_imm,adc_reg,adcs_reg,\
-                        adr,bfm,rev,\
+                        adr,bfm,clz,rbit,rev,\
                         shift_imm,shift_reg,\
                         mov_imm,mov_reg,mvn_imm,mvn_reg,\
                         mov_shift_reg,mov_shift,\
--- a/src/gcc/config/mips/mips.c
+++ b/src/gcc/config/mips/mips.c
@@ -7197,12 +7197,17 @@
   emit_insn (gen_slt_sf (dest, fp2, fp1));
 }
 
-/* Implement MOVE_BY_PIECES_P.  */
+/* Implement TARGET_USE_MOVE_BY_PIECES_INFRASTRUCTURE_P.  */
 
 bool
-mips_move_by_pieces_p (unsigned HOST_WIDE_INT size, unsigned int align)
+mips_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
+				     unsigned int align,
+				     enum by_pieces_operation op,
+				     bool speed_p)
 {
-  if (HAVE_movmemsi)
+  if (op == STORE_BY_PIECES)
+    return mips_store_by_pieces_p (size, align);
+  if (op == MOVE_BY_PIECES && HAVE_movmemsi)
     {
       /* movmemsi is meant to generate code that is at least as good as
 	 move_by_pieces.  However, movmemsi effectively uses a by-pieces
@@ -7219,13 +7224,12 @@
 	return size < UNITS_PER_WORD;
       return size <= MIPS_MAX_MOVE_BYTES_STRAIGHT;
     }
-  /* The default value.  If this becomes a target hook, we should
-     call the default definition instead.  */
-  return (move_by_pieces_ninsns (size, align, MOVE_MAX_PIECES + 1)
-	  < (unsigned int) MOVE_RATIO (optimize_insn_for_speed_p ()));
+
+  return default_use_by_pieces_infrastructure_p (size, align, op, speed_p);
 }
 
-/* Implement STORE_BY_PIECES_P.  */
+/* Implement a handler for STORE_BY_PIECES operations
+   for TARGET_USE_MOVE_BY_PIECES_INFRASTRUCTURE_P.  */
 
 bool
 mips_store_by_pieces_p (unsigned HOST_WIDE_INT size, unsigned int align)
@@ -19134,6 +19138,10 @@
 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV mips_atomic_assign_expand_fenv
 
+#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
+#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
+  mips_use_by_pieces_infrastructure_p
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-mips.h"
--- a/src/gcc/config/mips/mips.h
+++ b/src/gcc/config/mips/mips.h
@@ -2884,9 +2884,6 @@
    ? MIPS_MAX_MOVE_BYTES_STRAIGHT / MOVE_MAX		\
    : MIPS_CALL_RATIO / 2)
 
-#define MOVE_BY_PIECES_P(SIZE, ALIGN) \
-  mips_move_by_pieces_p (SIZE, ALIGN)
-
 /* For CLEAR_RATIO, when optimizing for size, give a better estimate
    of the length of a memset call, but use the default otherwise.  */
 
@@ -2899,9 +2896,6 @@
 
 #define SET_RATIO(speed) \
   ((speed) ? 15 : MIPS_CALL_RATIO - 2)
-
-#define STORE_BY_PIECES_P(SIZE, ALIGN) \
-  mips_store_by_pieces_p (SIZE, ALIGN)
 
 /* Since the bits of the _init and _fini function is spread across
    many object files, each potentially with its own GP, we must assume
--- a/src/gcc/params.def
+++ b/src/gcc/params.def
@@ -303,7 +303,7 @@
 DEFPARAM(PARAM_MAX_COMPLETELY_PEELED_INSNS,
 	"max-completely-peeled-insns",
 	"The maximum number of insns of a completely peeled loop",
-	100, 0, 0)
+	200, 0, 0)
 /* The maximum number of peelings of a single loop that is peeled completely.  */
 DEFPARAM(PARAM_MAX_COMPLETELY_PEEL_TIMES,
 	"max-completely-peel-times",
@@ -668,6 +668,11 @@
 	  "Minimal distance between possibly conflicting store and load",
 	  1, 0, 0)
 
+DEFPARAM (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
+	  "sched-autopref-queue-depth",
+	  "Hardware autoprefetcher scheduler model control flag.  Number of lookahead cycles the model looks into; at '0' only enable instruction sorting heuristic.  Disabled by default.",
+	  -1, 0, 0)
+
 DEFPARAM(PARAM_MAX_LAST_VALUE_RTL,
 	 "max-last-value-rtl",
 	 "The maximum number of RTL nodes that can be recorded as combiner's last value",
@@ -1095,6 +1100,21 @@
 	  "Maximum number of nested calls to search for control dependencies "
 	  "during uninitialized variable analysis",
 	  1000, 1, 0)
+
+DEFPARAM (PARAM_MAX_FSM_THREAD_PATH_INSNS,
+	  "max-fsm-thread-path-insns",
+	  "Maximum number of instructions to copy when duplicating blocks on a finite state automaton jump thread path",
+	  100, 1, 999999)
+
+DEFPARAM (PARAM_MAX_FSM_THREAD_LENGTH,
+	  "max-fsm-thread-length",
+	  "Maximum number of basic blocks on a finite state automaton jump thread path",
+	  10, 1, 999999)
+
+DEFPARAM (PARAM_MAX_FSM_THREAD_PATHS,
+	  "max-fsm-thread-paths",
+	  "Maximum number of new jump thread paths to create for a finite state automaton",
+	  50, 1, 999999)
 /*
 
 Local variables:
--- a/src/gcc/tree-ssa-threadedge.c
+++ b/src/gcc/tree-ssa-threadedge.c
@@ -48,6 +48,7 @@
 #include "langhooks.h"
 #include "params.h"
 #include "tree-ssa-threadedge.h"
+#include "tree-ssa-loop.h"
 
 /* To avoid code explosion due to jump threading, we limit the
    number of statements we are going to copy.  This variable
@@ -617,6 +618,7 @@
      rather than use a relational operator.  These are simpler to handle.  */
   if (TREE_CODE (cond) == SSA_NAME)
     {
+      tree original_lhs = cond;
       cached_lhs = cond;
 
       /* Get the variable's current value from the equivalence chains.
@@ -638,6 +640,12 @@
 	 pass specific callback to try and simplify it further.  */
       if (cached_lhs && ! is_gimple_min_invariant (cached_lhs))
         cached_lhs = (*simplify) (stmt, stmt);
+
+      /* We couldn't find an invariant.  But, callers of this
+	 function may be able to do something useful with the
+	 unmodified destination.  */
+      if (!cached_lhs)
+	cached_lhs = original_lhs;
     }
   else
     cached_lhs = NULL;
@@ -897,6 +905,259 @@
   return false;
 }
 
+/* Return true if the CFG contains at least one path from START_BB to END_BB.
+   When a path is found, record in PATH the blocks from END_BB to START_BB.
+   VISITED_BBS is used to make sure we don't fall into an infinite loop.  Bound
+   the recursion to basic blocks belonging to LOOP.  */
+
+static bool
+fsm_find_thread_path (basic_block start_bb, basic_block end_bb,
+		      vec<basic_block, va_gc> *&path,
+		      pointer_set_t *visited_bbs, loop_p loop)
+{
+  if (loop != start_bb->loop_father)
+    return false;
+
+  if (start_bb == end_bb)
+    {
+      vec_safe_push (path, start_bb);
+      return true;
+    }
+
+  if (!pointer_set_insert (visited_bbs, start_bb))
+    {
+      edge e;
+      edge_iterator ei;
+      FOR_EACH_EDGE (e, ei, start_bb->succs)
+	if (fsm_find_thread_path (e->dest, end_bb, path, visited_bbs, loop))
+	  {
+	    vec_safe_push (path, start_bb);
+	    return true;
+	  }
+    }
+
+  return false;
+}
+
+static int max_threaded_paths;
+
+/* We trace the value of the variable EXPR back through any phi nodes looking
+   for places where it gets a constant value and save the path.  Stop after
+   having recorded MAX_PATHS jump threading paths.  */
+
+static void
+fsm_find_control_statement_thread_paths (tree expr,
+					 pointer_set_t *visited_bbs,
+					 vec<basic_block, va_gc> *&path,
+					 bool seen_loop_phi)
+{
+  tree var = SSA_NAME_VAR (expr);
+  gimple def_stmt = SSA_NAME_DEF_STMT (expr);
+  basic_block var_bb = gimple_bb (def_stmt);
+
+  if (var == NULL || var_bb == NULL)
+    return;
+
+  /* For the moment we assume that an SSA chain only contains phi nodes, and
+     eventually one of the phi arguments will be an integer constant.  In the
+     future, this could be extended to also handle simple assignments of
+     arithmetic operations.  */
+  if (gimple_code (def_stmt) != GIMPLE_PHI)
+    return;
+
+  /* Avoid infinite recursion.  */
+  if (pointer_set_insert (visited_bbs, var_bb))
+    return;
+
+  int next_path_length = 0;
+  basic_block last_bb_in_path = path->last ();
+
+  if (loop_containing_stmt (def_stmt)->header == gimple_bb (def_stmt))
+    {
+      /* Do not walk through more than one loop PHI node.  */
+      if (seen_loop_phi)
+	return;
+      seen_loop_phi = true;
+    }
+
+  /* Following the chain of SSA_NAME definitions, we jumped from a definition in
+     LAST_BB_IN_PATH to a definition in VAR_BB.  When these basic blocks are
+     different, append to PATH the blocks from LAST_BB_IN_PATH to VAR_BB.  */
+  if (var_bb != last_bb_in_path)
+    {
+      edge e;
+      int e_count = 0;
+      edge_iterator ei;
+      vec<basic_block, va_gc> *next_path;
+      vec_alloc (next_path, n_basic_blocks_for_fn (cfun));
+
+      FOR_EACH_EDGE (e, ei, last_bb_in_path->preds)
+	{
+	  pointer_set_t *visited_bbs = pointer_set_create ();
+
+	  if (fsm_find_thread_path (var_bb, e->src, next_path, visited_bbs,
+				    e->src->loop_father))
+	    ++e_count;
+
+	  pointer_set_destroy (visited_bbs);
+
+	  /* If there is more than one path, stop.  */
+	  if (e_count > 1)
+	    {
+	      vec_free (next_path);
+	      return;
+	    }
+	}
+
+      /* Stop if we have not found a path: this could occur when the recursion
+	 is stopped by one of the bounds.  */
+      if (e_count == 0)
+	{
+	  vec_free (next_path);
+	  return;
+	}
+
+      /* Append all the nodes from NEXT_PATH to PATH.  */
+      vec_safe_splice (path, next_path);
+      next_path_length = next_path->length ();
+      vec_free (next_path);
+    }
+
+  gcc_assert (path->last () == var_bb);
+
+  /* Iterate over the arguments of PHI.  */
+  unsigned int i;
+  for (i = 0; i < gimple_phi_num_args (def_stmt); i++)
+    {
+      tree arg = gimple_phi_arg_def (def_stmt, i);
+      basic_block bbi = gimple_phi_arg_edge (def_stmt, i)->src;
+
+      /* Skip edges pointing outside the current loop.  */
+      if (!arg || var_bb->loop_father != bbi->loop_father)
+	continue;
+
+      if (TREE_CODE (arg) == SSA_NAME)
+	{
+	  vec_safe_push (path, bbi);
+	  /* Recursively follow SSA_NAMEs looking for a constant definition.  */
+	  fsm_find_control_statement_thread_paths (arg, visited_bbs, path,
+						   seen_loop_phi);
+
+	  path->pop ();
+	  continue;
+	}
+
+      if (TREE_CODE (arg) != INTEGER_CST)
+	continue;
+
+      int path_length = path->length ();
+      /* A path with less than 2 basic blocks should not be jump-threaded.  */
+      if (path_length < 2)
+	continue;
+
+      if (path_length > PARAM_VALUE (PARAM_MAX_FSM_THREAD_LENGTH))
+	{
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    fprintf (dump_file, "FSM jump-thread path not considered: "
+		     "the number of basic blocks on the path "
+		     "exceeds PARAM_MAX_FSM_THREAD_LENGTH.\n");
+	  continue;
+	}
+
+      if (max_threaded_paths <= 0)
+	{
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    fprintf (dump_file, "FSM jump-thread path not considered: "
+		     "the number of previously recorded FSM paths to thread "
+		     "exceeds PARAM_MAX_FSM_THREAD_PATHS.\n");
+	  continue;
+	}
+
+      /* Add BBI to the path.  */
+      vec_safe_push (path, bbi);
+      ++path_length;
+
+      int n_insns = 0;
+      gimple_stmt_iterator gsi;
+      int j;
+      loop_p loop = (*path)[0]->loop_father;
+      bool path_crosses_loops = false;
+
+      /* Count the number of instructions on the path: as these instructions
+	 will have to be duplicated, we will not record the path if there are
+	 too many instructions on the path.  Also check that all the blocks in
+	 the path belong to a single loop.  */
+      for (j = 1; j < path_length - 1; j++)
+	{
+	  basic_block bb = (*path)[j];
+
+	  if (bb->loop_father != loop)
+	    {
+	      path_crosses_loops = true;
+	      break;
+	    }
+
+	  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+	    {
+	      gimple stmt = gsi_stmt (gsi);
+	      /* Do not count empty statements and labels.  */
+	      if (gimple_code (stmt) != GIMPLE_NOP
+		  && gimple_code (stmt) != GIMPLE_LABEL
+		  && !is_gimple_debug (stmt))
+		++n_insns;
+	    }
+	}
+
+      if (path_crosses_loops)
+	{
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    fprintf (dump_file, "FSM jump-thread path not considered: "
+		     "the path crosses loops.\n");
+	  path->pop ();
+	  continue;
+	}
+
+      if (n_insns >= PARAM_VALUE (PARAM_MAX_FSM_THREAD_PATH_INSNS))
+	{
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    fprintf (dump_file, "FSM jump-thread path not considered: "
+		     "the number of instructions on the path "
+		     "exceeds PARAM_MAX_FSM_THREAD_PATH_INSNS.\n");
+	  path->pop ();
+	  continue;
+	}
+
+      vec<jump_thread_edge *> *jump_thread_path
+	= new vec<jump_thread_edge *> ();
+
+      /* Record the edges between the blocks in PATH.  */
+      for (j = 0; j < path_length - 1; j++)
+	{
+	  edge e = find_edge ((*path)[path_length - j - 1],
+			      (*path)[path_length - j - 2]);
+	  gcc_assert (e);
+	  jump_thread_edge *x = new jump_thread_edge (e, EDGE_FSM_THREAD);
+	  jump_thread_path->safe_push (x);
+	}
+
+      /* Add the edge taken when the control variable has value ARG.  */
+      edge taken_edge = find_taken_edge ((*path)[0], arg);
+      jump_thread_edge *x
+	= new jump_thread_edge (taken_edge, EDGE_NO_COPY_SRC_BLOCK);
+      jump_thread_path->safe_push (x);
+
+      register_jump_thread (jump_thread_path);
+      --max_threaded_paths;
+
+      /* Remove BBI from the path.  */
+      path->pop ();
+    }
+
+  /* Remove all the nodes that we added from NEXT_PATH.  */
+  if (next_path_length)
+    vec_safe_truncate (path, (path->length () - next_path_length));
+}
+
 /* We are exiting E->src, see if E->dest ends with a conditional
    jump which has a known value when reached via E.
 
@@ -982,7 +1243,10 @@
       cond = simplify_control_stmt_condition (e, stmt, dummy_cond, simplify,
 					      handle_dominating_asserts);
 
-      if (cond && is_gimple_min_invariant (cond))
+      if (!cond)
+	return 0;
+
+      if (is_gimple_min_invariant (cond))
 	{
 	  edge taken_edge = find_taken_edge (e->dest, cond);
 	  basic_block dest = (taken_edge ? taken_edge->dest : NULL);
@@ -1028,6 +1292,28 @@
 				      backedge_seen_p);
 	  return 1;
 	}
+
+      if (!flag_expensive_optimizations
+	  || optimize_function_for_size_p (cfun)
+	  || TREE_CODE (cond) != SSA_NAME
+	  || e->dest->loop_father != e->src->loop_father
+	  || loop_depth (e->dest->loop_father) == 0)
+	return 0;
+
+      /* When COND cannot be simplified, try to find paths from a control
+	 statement back through the PHI nodes which would affect that control
+	 statement.  */
+      vec<basic_block, va_gc> *bb_path;
+      vec_alloc (bb_path, n_basic_blocks_for_fn (cfun));
+      vec_safe_push (bb_path, e->dest);
+      pointer_set_t *visited_bbs = pointer_set_create ();
+
+      max_threaded_paths = PARAM_VALUE (PARAM_MAX_FSM_THREAD_PATHS);
+      fsm_find_control_statement_thread_paths (cond, visited_bbs, bb_path,
+					       false);
+
+      pointer_set_destroy (visited_bbs);
+      vec_free (bb_path);
     }
   return 0;
 }
--- a/src/gcc/convert.c
+++ b/src/gcc/convert.c
@@ -471,8 +471,8 @@
 	  break;
 
 	CASE_FLT_FN (BUILT_IN_ROUND):
-	  /* Only convert in ISO C99 mode.  */
-	  if (!targetm.libc_has_function (function_c99_misc))
+	  /* Only convert in ISO C99 mode and with -fno-math-errno.  */
+	  if (!targetm.libc_has_function (function_c99_misc) || flag_errno_math)
 	    break;
 	  if (outprec < TYPE_PRECISION (integer_type_node)
 	      || (outprec == TYPE_PRECISION (integer_type_node)
@@ -492,8 +492,8 @@
 	    break;
 	  /* ... Fall through ...  */
 	CASE_FLT_FN (BUILT_IN_RINT):
-	  /* Only convert in ISO C99 mode.  */
-	  if (!targetm.libc_has_function (function_c99_misc))
+	  /* Only convert in ISO C99 mode and with -fno-math-errno.  */
+	  if (!targetm.libc_has_function (function_c99_misc) || flag_errno_math)
 	    break;
 	  if (outprec < TYPE_PRECISION (integer_type_node)
 	      || (outprec == TYPE_PRECISION (integer_type_node)
--- a/src/libobjc/ChangeLog.linaro
+++ b/src/libobjc/ChangeLog.linaro
@@ -0,0 +1,75 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
--- a/src/libvtv/ChangeLog.linaro
+++ b/src/libvtv/ChangeLog.linaro
@@ -0,0 +1,75 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
--- a/src/libgfortran/configure
+++ b/src/libgfortran/configure
@@ -25950,7 +25950,7 @@
 # test is copied from libgomp, and modified to not link in -lrt as
 # libgfortran calls clock_gettime via a weak reference if it's found
 # in librt.
-if test $ac_cv_func_clock_gettime = no; then
+if test "$ac_cv_func_clock_gettime" = no; then
   { $as_echo "$as_me:${as_lineno-$LINENO}: checking for clock_gettime in -lrt" >&5
 $as_echo_n "checking for clock_gettime in -lrt... " >&6; }
 if test "${ac_cv_lib_rt_clock_gettime+set}" = set; then :
--- a/src/libgfortran/configure.ac
+++ b/src/libgfortran/configure.ac
@@ -518,7 +518,7 @@
 # test is copied from libgomp, and modified to not link in -lrt as
 # libgfortran calls clock_gettime via a weak reference if it's found
 # in librt.
-if test $ac_cv_func_clock_gettime = no; then
+if test "$ac_cv_func_clock_gettime" = no; then
   AC_CHECK_LIB(rt, clock_gettime,
     [AC_DEFINE(HAVE_CLOCK_GETTIME_LIBRT, 1,
                [Define to 1 if you have the `clock_gettime' function in librt.])])
--- a/src/libgfortran/ChangeLog.linaro
+++ b/src/libgfortran/ChangeLog.linaro
@@ -0,0 +1,83 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	Backport from trunk r209747.
+	2014-04-24  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* configure.ac: Quote usage of ac_cv_func_clock_gettime in if test.
+	* configure: Regenerate.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
--- a/src/libada/ChangeLog.linaro
+++ b/src/libada/ChangeLog.linaro
@@ -0,0 +1,75 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
--- a/src/libffi/ChangeLog.linaro
+++ b/src/libffi/ChangeLog.linaro
@@ -0,0 +1,75 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
--- a/src/libssp/ChangeLog.linaro
+++ b/src/libssp/ChangeLog.linaro
@@ -0,0 +1,75 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
--- a/src/libcilkrts/ChangeLog.linaro
+++ b/src/libcilkrts/ChangeLog.linaro
@@ -0,0 +1,75 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
--- a/src/libcpp/ChangeLog.linaro
+++ b/src/libcpp/ChangeLog.linaro
@@ -0,0 +1,75 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
--- a/src/libcpp/po/ChangeLog.linaro
+++ b/src/libcpp/po/ChangeLog.linaro
@@ -0,0 +1,75 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
--- a/src/fixincludes/ChangeLog.linaro
+++ b/src/fixincludes/ChangeLog.linaro
@@ -0,0 +1,75 @@
+2015-11-02  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.10 snapshot.
+
+2015-08-05  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06-1 snapshot.
+
+2015-06-30  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.06 released.
+
+2015-04-16  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.9-2015.04 snapshot.
+
+2015-03-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.03 released.
+
+2015-02-12  Michael Collison  <michael.collison@linaro.org>
+
+        GCC Linaro 4.9-2015.02 released.
+
+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2015.01 released.
+
+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.12 released.
+
+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.11 released.
+
+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10-1 released.
+
+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.10 released.
+
+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.09 released.
+
+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.08 released.
+
+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07-1 released.
+
+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.07 released.
+
+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06-1 released.
+
+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.06 released.
+
+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.05 released.
+
+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.9-2014.04 released.
gcc-4.9-source 4.9.3-13ubuntu2 / usr / src / gcc-4.9 / debian / patches / gcc-linaro.diff