/usr/src/gcc-4.6/debian/patches/gcc-linaro.diff

# DP: Changes for the Linaro 4.6-2013.04 release.

--- a/src/ChangeLog.linaro
+++ b/src/ChangeLog.linaro
@@ -0,0 +1,3956 @@
+2013-04-08  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.6-2013.04 released.
+
+	gcc/
+	* LINARO-VERSION: Update.
+
+2013-04-05  Matthew Gretton-Dann  <matthew.gretton-dann@linaro.org>
+
+	Merge from FSF GCC 4.6.3 (svn branches/gcc-4_6-branch 197511)
+
+2013-04-04  Matthew Gretton-Dann  <matthew.gretton-dann@linaro.org>
+
+	Merge from FSF GCC 4.6.3 (svn branches/gcc-4_6-branch 197470)
+
+2013-04-02  Matthew Gretton-Dann  <matthew.gretton-dann@linaro.org>
+
+	Merge from FSF GCC 4.6.3 (svn branches/gcc-4_6-branch 197313)
+
+2013-03-11  Matthew Gretton-Dann  <matthew.gretton-dann@linaro.org>
+
+	gcc/
+	* LINARO-VERSION: Bump version.
+
+2013-03-11  Matthew Gretton-Dann  <matthew.gretton-dann@linaro.org>
+
+	GCC Linaro 4.6-2013.03 released.
+
+	gcc/
+	* LINARO-VERSION: Update.
+
+2013-02-25  Matthew Gretton-Dann  <matthew.gretton-dann@linaro.org>
+
+	Merge from FSF GCC 4.6.3 (svn branches/gcc-4_6-branch 196247)
+
+2013-02-08  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	gcc/
+	* LINARO: Bump version.
+
+2013-02-08  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	GCC Linaro 4.6-2013.02 released.
+
+	gcc/
+	* LINARO-VERSION: Update.
+
+2013-02-05  Yvan Roux  <yvan.roux@linaro.org>
+
+	Merge from FSF GCC 4.6.3 (svn branches/gcc-4_6-branch 195744).
+
+2013-01-15  Zhenqiang Chen  <zhenqiang.chen@linaro.org>
+
+	gcc/
+	* LINARO-VERSION: Bump version.
+
+2013-01-15  Zhenqiang Chen  <zhenqiang.chen@linaro.org>
+
+	GCC Linaro 4.6-2013.01 released.
+
+	gcc/
+	* LINARO-VERSION: Update.
+
+2013-01-02  Matthew Gretton-Dann  <matthew.gretton-dann@linaro.org>
+
+	Merge from FSF GCC 4.6.3 (svn branches/gcc-4_6-branch 194771).
+
+2012-12-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	gcc/
+	* LINARO-VERSION: Bump version.
+
+2012-12-12  Yvan Roux  <yvan.roux@linaro.org>
+
+	GCC Linaro 4.6-2012.12 released.
+
+	gcc/
+	* LINARO-VERSION: Update.
+
+2012-12-10  Yvan Roux  <yvan.roux@linaro.org>
+
+	Merge from FSF GCC 4.6.3 (svn branches/gcc-4_6-branch 194340).
+
+2012-11-13  Matthew Gretton-Dann  <matthew.gretton-dann@linaro.org>
+
+	gcc/
+	* LINARO-VERSION: Bump version.
+
+2012-11-12  Matthew Gretton-Dann  <matthew.gretton-dann@linaro.org>
+
+	GCC Linaro 4.6-2012.11 released.
+
+	gcc/
+	* LINARO-VERSION: Update.
+
+2012-11-07  Michael Hope  <michael.hope@linaro.org>
+
+	Merge from FSF GCC 4.6.3 (svn branches/gcc-4_6-branch 193199).
+
+2012-10-08  Matthew Gretton-Dann  <matthew.gretton-dann@linaro.org>
+
+	gcc/
+	* LINARO-VERSION: Bump version.
+
+2012-10-08  Matthew Gretton-Dann  <matthew.gretton-dann@linaro.org>
+
+	GCC Linaro 4.6-2012.10 released.
+
+	gcc/
+	* LINARO-VERSION: Update.
+
+2012-10-01  Matthew Gretton-Dann  <matthew.gretton-dann@linaro.org>
+
+	Merge from FSF GCC 4.6.3 (svn branches/gcc-4_6-branch 191880)
+
+2012-09-18  Matthew Gretton-Dann  <matthew.gretton-dann@linaro.org>
+
+	LP 1029454
+	Backport from mainline r183524:
+
+	gcc/
+	2012-01-25  Jakub Jelinek  <jakub@redhat.com>
+
+	PR tree-optimization/51987
+	* tree-data-ref.c (get_references_in_stmt): Handle references in
+	non-volatile GIMPLE_ASM.
+
+	gcc/testsuite/
+	2012-01-25  Jakub Jelinek  <jakub@redhat.com>
+
+	PR tree-optimization/51987
+	* gcc.target/i386/pr51987.c: New test.
+
+2012-09-12  Michael Hope  <michael.hope@linaro.org>
+
+	gcc/
+	* LINARO-VERSION: Bump version.
+
+2012-09-12  Michael Hope  <michael.hope@linaro.org>
+
+	GCC Linaro 4.6-2012.09 released.
+
+	gcc/
+	* LINARO-VERSION: Update.
+
+2012-09-11  Michael Hope  <michael.hope@linaro.org>
+
+	Merge from FSF GCC 4.6.3 (svn branches/gcc-4_6-branch 191000).
+
+2012-08-13  Matthew Gretton-Dann  <matthew.gretton-dann@linaro.org>
+
+	gcc/
+	* LINARO-VERSION: Bump version.
+
+2012-08-13  Matthew Gretton-Dann  <matthew.gretton-dann@linaro.org>
+
+	GCC Linaro 4.6-2012.08 released.
+
+	gcc/
+	* LINARO-VERSION: Update.
+
+2012-08-10  Ulrich Weigand  <ulrich.weigand@linaro.org>
+
+	Backport from mainline:
+
+	gcc/
+	2012-07-30  Ulrich Weigand  <ulrich.weigand@linaro.org>
+		    Richard Earnshaw  <rearnsha@arm.com>
+
+	* target.def (vector_alignment): New target hook.
+	* doc/tm.texi.in (TARGET_VECTOR_ALIGNMENT): Document new hook.
+	* doc/tm.texi: Regenerate.
+	* targhooks.c (default_vector_alignment): New function.
+	* targhooks.h (default_vector_alignment): Add prototype.
+	* stor-layout.c (layout_type): Use targetm.vector_alignment.
+	* config/arm/arm.c (arm_vector_alignment): New function.
+	(TARGET_VECTOR_ALIGNMENT): Define.
+
+	* tree-vect-data-refs.c (vect_update_misalignment_for_peel): Use
+	vector type alignment instead of size.
+	* tree-vect-loop-manip.c (vect_do_peeling_for_loop_bound): Use
+	element type size directly instead of computing it from alignment.
+	Fix variable naming and comment.
+
+	gcc/testsuite/
+	2012-07-30  Ulrich Weigand  <ulrich.weigand@linaro.org>
+
+	* lib/target-supports.exp
+	(check_effective_target_vect_natural_alignment): New function.
+	* gcc.dg/align-2.c: Only run on targets with natural alignment
+	of vector types.
+	* gcc.dg/vect/slp-25.c: Adjust tests for targets without natural
+	alignment of vector types.
+
+	2011-12-21  Michael Zolotukhin  <michael.v.zolotukhin@intel.com>
+
+	* gcc.dg/vect/vect-peel-1.c: Adjust test diag-scans to fix fail on AVX.
+	* gcc.dg/vect/vect-peel-2.c: Ditto.
+
+	2011-06-21  Ira Rosen  <ira.rosen@linaro.org>
+
+	PR testsuite/49443
+	* gcc.dg/vect/vect-peel-3.c: Expect to fail on vect_no_align
+	targets.
+	* gcc.dg/vect/vect-peel-4.c: Likewise.
+
+	2011-06-14  Ira Rosen  <ira.rosen@linaro.org>
+
+	* gcc.dg/vect/vect-peel-3.c: Adjust misalignment values
+	for double-word vectors.
+	* gcc.dg/vect/vect-peel-4.c: Likewise.
+
+2012-08-01  Michael Hope  <michael.hope@linaro.org>
+
+	Merge from FSF GCC 4.6.3 (svn branches/gcc-4_6-branch 189991).
+
+2012-07-02  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	gcc/
+	* LINARO-VERSION: Bump version.
+
+2012-07-02  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	GCC Linaro 4.6-2012.07 released.
+
+	gcc/
+	* LINARO-VERSION: Update.
+
+2012-07-01  Michael Hope  <michael.hope@linaro.org>
+
+	Merge from FSF GCC 4.6.3 (svn branches/gcc-4_6-branch 189058).
+
+2012-06-29  Ulrich Weigand  <ulrich.weigand@linaro.org>
+
+	LP 1010826
+
+	Backport from mainline:
+
+	gcc/
+	PR tree-optimization/53729
+	PR tree-optimization/53636
+	* tree-vect-slp.c (vect_slp_analyze_bb_1): Delay call to
+	vect_verify_datarefs_alignment until after statements have
+	been marked as relevant/irrelevant.
+	* tree-vect-data-refs.c (vect_verify_datarefs_alignment):
+	Skip irrelevant statements.
+	(vect_enhance_data_refs_alignment): Use STMT_VINFO_RELEVANT_P
+	instead of STMT_VINFO_RELEVANT.
+	(vect_get_data_access_cost): Do not check for supportable
+	alignment before calling vect_get_load_cost/vect_get_store_cost.
+	* tree-vect-stmts.c (vect_get_store_cost): Do not abort when
+	handling unsupported alignment.
+	(vect_get_load_cost): Likewise.
+
+	gcc/
+	PR tree-optimization/53636
+	* tree-vect-data-refs.c (vect_compute_data_ref_alignment): Verify
+	stride when doing basic-block vectorization.
+
+	gcc/testsuite/
+	PR tree-optimization/53636
+	* gcc.target/arm/pr53636.c: New test.
+
+2012-06-28  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	gcc/
+	* config/arm/arm.c (neon_dereference_pointer): Fixup typos.
+
+	gcc/testsuite/
+	* gcc.target/arm/lp101329.c: Remove unneeded comment.
+
+2012-06-21  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	LP 1013209
+	gcc/
+	* config/arm/arm.c (neon_dereference_pointer): Use fold_convert
+	to convert expression to the right type. This is a Linaro 4.6 only
+	patch as this was originally backported from FSF 4.7 and hence applies
+	only here.
+
+	gcc/testsuite/
+	* gcc.target/arm/lp101329.c: New test.
+
+2012-06-14  Michael Hope  <michael.hope@linaro.org>
+
+	gcc/
+	* LINARO-VERSION: Bump version.
+
+2012-06-12  Michael Hope  <michael.hope@linaro.org>
+
+	GCC Linaro 4.6-2012.06 released.
+
+	gcc/
+	* LINARO-VERSION: Update.
+
+2012-06-11  Michael Hope  <michael.hope@linaro.org>
+
+	Merge from FSF GCC 4.6.3 (svn branches/gcc-4_6-branch 188320).
+
+2012-05-22  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	Backport from mainline:
+	gcc/
+	2012-03-15  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	* config.gcc (target_type_format_char): New. Document it. Set it for
+        arm*-*-* .
+
+2012-05-23  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	LP:990530
+	gcc/
+        2012-03-12  Richard Guenther  <rguenther@suse.de>
+	* config/arm/arm.c (neon_dereference_pointer): Do not call
+	covert during RTL expansion.
+
+2012-05-21  Michael Hope  <michael.hope@linaro.org>
+
+	Backport from mainline r186859:
+
+	gcc/
+	2012-04-26  Michael Hope  <michael.hope@linaro.org>
+		    Richard Earnshaw  <rearnsha@arm.com>
+
+	* config/arm/linux-eabi.h (GLIBC_DYNAMIC_LINKER_SOFT_FLOAT): Define.
+	(GLIBC_DYNAMIC_LINKER_HARD_FLOAT): Define.
+	(GLIBC_DYNAMIC_LINKER_DEFAULT): Define.
+	(GLIBC_DYNAMIC_LINKER):	Redefine to use the hard float path.
+
+	Backport from mainline r187012:
+
+	gcc/
+	2012-05-01  Richard Earnshaw  <rearnsha@arm.com>
+
+	* arm/linux-eabi.h (GLIBC_DYNAMIC_LINKER_DEFAULT): Avoid ifdef
+	comparing enumeration values.  Update comments.
+
+2012-05-15  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* LINARO-VERSION: Bump version.
+
+2012-05-15  Andrew Stubbs  <ams@codesourcery.com>
+
+	GCC Linaro 4.6-2012.05 released.
+
+	gcc/
+	* LINARO-VERSION: Update.
+
+2012-05-08  Andrew Stubbs  <ams@codesourcery.com>
+
+	Merge from FSF GCC 4.6.3 (svn branches/gcc-4_6-branch 187273).
+
+2012-05-08  Ulrich Weigand  <ulrich.weigand@linaro.org>
+
+	LP 959242
+
+	Backport from mainline:
+
+	2012-05-04  Ulrich Weigand  <ulrich.weigand@linaro.org>
+
+	gcc/
+	PR tree-optimization/52633
+	* tree-vect-patterns.c (vect_vect_recog_func_ptrs): Swap order of
+	vect_recog_widen_shift_pattern and vect_recog_over_widening_pattern.
+	(vect_recog_over_widening_pattern): Remove handling of code that was
+	already detected as over-widening pattern.  Remove special handling
+	of "unsigned" cases.  Instead, support general case of conversion
+	of the shift result to another type.
+
+	gcc/testsuite/
+	PR tree-optimization/52633
+	* gcc.target/arm/pr52633.c: New test.
+
+	gcc/
+	* tree-vect-patterns.c (vect_single_imm_use): New function.
+	(vect_recog_widen_mult_pattern): Use it instead of open-coding loop.
+	(vect_recog_over_widening_pattern): Likewise.
+	(vect_recog_widen_shift_pattern): Likewise.
+
+	gcc/
+	* tree-vect-patterns.c (vect_same_loop_or_bb_p): New function.
+	(vect_handle_widen_op_by_const): Use it instead of open-coding test.
+	(vect_recog_widen_mult_pattern): Likewise.
+	(vect_operation_fits_smaller_type): Likewise.
+	(vect_recog_over_widening_pattern): Likewise.
+	(vect_recog_widen_shift_pattern): Add to vect_same_loop_or_bb_p test.
+
+2012-04-16  Ulrich Weigand  <ulrich.weigand@linaro.org>
+
+	LP 972648
+
+	Backport from mainline:
+
+	2011-08-23  Richard Guenther  <rguenther@suse.de>
+
+	gcc/
+	* Makefile.in (tree-data-ref.o): Add tree-affine.h dependency.
+	* tree-affine.h (aff_comb_cannot_overlap_p): Declare.
+	* tree-affine.c (aff_comb_cannot_overlap_p): New function, moved
+	from ...
+	* tree-ssa-loop-im.c (cannot_overlap_p): ... here.
+	(mem_refs_may_alias_p): Adjust.
+	* tree-data-ref.h (dr_may_alias_p): Adjust.
+	* tree-data-ref.c: Include tree-affine.h.
+	(dr_analyze_indices): Do nothing for the non-loop case.
+	(dr_may_alias_p): Distinguish loop and non-loop case.  Disambiguate
+	more cases in the non-loop case.
+	* graphite-sese-to-poly.c (write_alias_graph_to_ascii_dimacs): Adjust
+	calls to dr_may_alias_p.
+	(write_alias_graph_to_ascii_ecc): Likewise.
+	(write_alias_graph_to_ascii_dot): Likewise.
+	(build_alias_set_optimal_p): Likewise.
+
+2012-04-13  Ulrich Weigand  <ulrich.weigand@linaro.org>
+
+	LP 968766
+
+	Backport from mainline:
+
+	gcc/
+	PR tree-optimization/52870
+	* tree-vect-patterns.c (vect_recog_widen_mult_pattern): Verify that
+	presumed pattern statement is within the same loop or basic block.
+
+	gcc/testsuite/
+	PR tree-optimization/52870
+	* gcc.dg/vect/pr52870.c: New test.
+
+2012-04-10  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* LINARO-VERSION: Bump version.
+
+2012-04-10  Andrew Stubbs  <ams@codesourcery.com>
+
+	GCC Linaro 4.6-2012.04 released.
+
+	gcc/
+	* LINARO-VERSION: Update.
+
+2012-04-02  Andrew Stubbs  <ams@codesourcery.com>
+
+	Merge from FSF GCC 4.6.3 (svn branches/gcc-4_6-branch 186060).
+
+2012-03-26  Ulrich Weigand  <ulrich.weigand@linaro.org>
+
+	LP 960283
+	LP 960274
+	LP 960817
+
+	Backport from mainline:
+
+	gcc/
+	PR tree-optimization/52686
+	* tree-vect-data-refs.c (vect_get_smallest_scalar_type): Handle
+	WIDEN_LSHIFT_EXPR.
+
+	gcc/testsuite/
+	PR tree-optimization/52686
+	* gcc.target/arm/pr52686.c: New test.
+
+2012-03-12  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* LINARO-VERSION: Bump version.
+
+2012-03-12  Andrew Stubbs  <ams@codesourcery.com>
+
+	GCC Linaro 4.6-2012.03 released.
+
+	gcc/
+	* LINARO-VERSION: Update.
+
+2012-03-08  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	Backport from mainline.
+        2012-02-28  Richard Earnshaw  <rearnsha@arm.com>
+
+	* arm.c (aapcs_vfp_is_call_or_return_candidate): Only use the machine
+	mode if there is no type information available.
+
+        2012-02-28  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
+
+	* gcc.target/arm/aapcs/vfp1.c (dg_do run): Run on all eabi variants.
+	* gcc.target/arm/aapcs/vfp2.c: Likewise.
+	* gcc.target/arm/aapcs/vfp3.c: Likewise.
+	* gcc.target/arm/aapcs/vfp4.c: Likewise.
+	* gcc.target/arm/aapcs/vfp5.c: Likewise.
+	* gcc.target/arm/aapcs/vfp6.c: Likewise.
+	* gcc.target/arm/aapcs/vfp7.c: Likewise.
+	* gcc.target/arm/aapcs/vfp8.c: Likewise.
+	* gcc.target/arm/aapcs/vfp9.c: Likewise.
+	* gcc.target/arm/aapcs/vfp10.c: Likewise.
+	* gcc.target/arm/aapcs/vfp11.c: Likewise.
+	* gcc.target/arm/aapcs/vfp12.c: Likewise.
+	* gcc.target/arm/aapcs/vfp13.c: Likewise.
+	* gcc.target/arm/aapcs/vfp14.c: Likewise.
+	* gcc.target/arm/aapcs/vfp15.c: Likewise.
+	* gcc.target/arm/aapcs/vfp16.c: Likewise.
+	* gcc.target/arm/aapcs/vfp17.c: Likewise.
+	* gcc.target/arm/neon-constants.h: New file.
+	* gcc.target/arm/aapcs/neon-constants.h: New file.
+	* gcc.target/arm/aapcs/neon-vect1.c: New test.
+	* gcc.target/arm/aapcs/neon-vect2.c: New test.
+	* gcc.target/arm/aapcs/neon-vect3.c: New test.
+	* gcc.target/arm/aapcs/neon-vect4.c: New test.
+	* gcc.target/arm/aapcs/neon-vect5.c: New test.
+	* gcc.target/arm/aapcs/neon-vect6.c: New test.
+	* gcc.target/arm/aapcs/neon-vect7.c: New test.
+	* gcc.target/arm/aapcs/neon-vect8.c: New test.
+
+2012-03-08  Michael Hope  <michael.hope@linaro.org>
+
+	Backport proposed patch:
+
+	gcc/
+	2012-01-31  Richard Henderson  <rth@redhat.com>
+
+	* longlong.h [arm] (umul_ppmm): Use umull.  Enable for thumb2.
+	[arm] (count_trailing_zeros): Use __builtin_ctz.
+
+2012-03-06  Ulrich Weigand  <ulrich.weigand@linaro.org>
+
+	Backport from mainline:
+
+	gcc/
+	* config/arm/arm.c (arm_sat_operator_match): New function.
+	* config/arm/arm-protos.h (arm_sat_operator_match): Add prototype.
+	* config/arm/arm.md ("insn" attribute): Add "sat" value.
+	("SAT", "SATrev"): New code iterators.
+	("SATlo", "SAThi"): New code iterator attributes.
+	("*satsi_<SAT:code>"): New pattern.
+	("*satsi_<SAT:code>_shift"): Likewise.
+	* config/arm/predicates.md (sat_shift_operator): New.
+
+	gcc/testsuite/
+	* gcc.target/arm/sat-1.c: New test.
+
+2012-03-06  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	LP:942307
+    	gcc/
+    	PR target/50305
+    	* config/arm/arm.c (arm_legitimize_reload_address): Recognize
+    	output of a previous pass through legitimize_reload_address.
+    	Do not attempt to optimize addresses if the base register is
+    	equivalent to a constant.
+    	gcc/testsuite/
+    	PR target/50305
+    	* gcc.target/arm/pr50305.c: New test.
+
+2012-03-02  Andrew Stubbs  <ams@codesourcery.com>
+
+	Merge from FSF GCC 4.6.3 Release (svn branches/gcc-4_6-branch 108680).
+
+2012-02-24  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	Backport from mainline.
+	gcc/
+        2012-02-21  Matthew Gretton-Dann  <matthew.gretton-dann@arm.com>
+
+        Revert r183011
+        * config/arm/arm-cores.def (cortex-a15): Use generic Cortex tuning
+        parameters.
+        * config/arm/arm.c (arm_cortex_a15_tune): Remove.
+
+
+2012-02-24  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	LP:#922474
+	gcc/
+	* config/arm/sync.md (sync_lock_releasedi): Define.
+	(arm_sync_lock_releasedi): Likewise.
+	gcc/testsuite
+	Backport from mainline.
+        2012-01-30  Greta Yorsh  <Greta.Yorsh@arm.com>
+	* gcc.target/arm/di-longlong64-sync-withldrexd.c: Accept
+	new code generated for __sync_lock_release.
+
+2012-02-24  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+        2011-12-05  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	gcc/
+	* config/arm/arm.c (vfp3_const_double_for_fract_bits): Define.
+	* config/arm/arm-protos.h (vfp3_const_double_for_fract_bits): Declare.
+	* config/arm/constraints.md ("Dt"): New constraint.
+	* config/arm/predicates.md (const_double_vcvt_power_of_two_reciprocal):
+	New.
+	* config/arm/vfp.md (*arm_combine_vcvt_f32_s32): New.
+	(*arm_combine_vcvt_f32_u32): New.
+
+	LP:#900426
+
+	2011-12-06  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+        * config/arm/vfp.md (*combine_vcvt_f64_<FCVTI32typename>): Fix
+       formatting character for vmov.f64 case.
+
+2012-02-24  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	gcc/
+	* config/arm/arm.c (arm_print_operand): Remove wrongly merged code.
+	 (vfp3_const_double_for_fract_bits): Likewise.
+
+2012-02-20  Andrew Stubbs  <ams@codesourcery.com>
+
+	LP:#936863
+	gcc/
+	* config/arm/arm.c (arm_print_operand): Avoid null-pointer
+	dereference from MEM_SIZE.
+
+2012-02-08  Ulrich Weigand  <ulrich.weigand@linaro.org>
+
+	gcc/
+	* config/arm/arm.c (arm_option_optimization_table): Enable
+	-fsched-pressure using -fsched-pressure-algorithm=model by
+	default when optimizing.
+
+2012-02-08  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	* sched-deps.c (fixup_sched_groups): Rename to...
+	(chain_to_prev_insn): ...this.
+	(chain_to_prev_insn_p): New function.
+	(deps_analyze_insn): Use it instead of SCHED_GROUP_P.
+
+2012-02-08  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	* sched-int.h (_haifa_insn_data): Move priority_status.
+	Add model_index.
+	(INSN_MODEL_INDEX): New macro.
+	* haifa-sched.c (insn_delay): New function.
+	(sched_regno_pressure_class): Update commentary.
+	(mark_regno_birth_or_death): Pass the liveness bitmap and
+	pressure array as arguments, instead of using curr_reg_live and
+	curr_reg_pressure.  Only update the pressure if the bit in the
+	liveness set has changed.
+	(initiate_reg_pressure_info): Always trust the live-in set for
+	SCHED_PRESSURE_MODEL.
+	(initiate_bb_reg_pressure_info): Update call to
+	mark_regno_birth_or_death.
+	(dep_list_size): Take the list as argument.
+	(calculate_reg_deaths): New function, extracted from...
+	(setup_insn_reg_pressure_info): ...here.
+	(MODEL_BAR): New macro.
+	(model_pressure_data, model_insn_info, model_pressure_limit)
+	(model_pressure_group): New structures.
+	(model_schedule, model_worklist, model_insns, model_num_insns)
+	(model_curr_point, model_before_pressure, model_next_priority):
+	New variables.
+	(MODEL_PRESSURE_DATA, MODEL_MAX_PRESSURE, MODEL_REF_PRESSURE)
+	(MODEL_INSN_INFO, MODEL_INSN): New macros.
+	(model_index, model_update_limit_points_in_group): New functions.
+	(model_update_limit_points, model_last_use_except): Likewise.
+	(model_start_update_pressure, model_update_pressure): Likewise.
+	(model_recompute, model_spill_cost, model_excess_group_cost): Likewise.
+	(model_excess_cost, model_dump_pressure_points): Likewise.
+	(model_set_excess_costs): Likewise.
+	(rank_for_schedule): Extend SCHED_PRIORITY_WEIGHTED ordering to
+	SCHED_PRIORITY_MODEL.  Use insn_delay.  Use the order in the model
+	schedule as an alternative tie-breaker.  Update the call to
+	dep_list_size.
+	(ready_sort): Call model_set_excess_costs.
+	(update_register_pressure): Update call to mark_regno_birth_or_death.
+	Rely on that function to check liveness rather than doing it here.
+	(model_classify_pressure, model_order_p, model_add_to_worklist_at)
+	(model_remove_from_worklist, model_add_to_worklist, model_promote_insn)
+	(model_add_to_schedule, model_analyze_insns, model_init_pressure_group)
+	(model_record_pressure, model_record_pressures): New functions.
+	(model_record_final_pressures, model_add_successors_to_worklist)
+	(model_promote_predecessors, model_choose_insn): Likewise.
+	(model_reset_queue_indices, model_dump_pressure_summary): Likewise.
+	(model_start_schedule, model_finalize_pressure_group): Likewise.
+	(model_end_schedule): Likewise.
+	(schedule_insn): Say when we're scheduling the next instruction
+	in the model schedule.
+	(schedule_insn): Handle SCHED_PRESSURE_MODEL.
+	(queue_to_ready): Do not add instructions that are
+	MAX_SCHED_READY_INSNS beyond the current point of the model schedule.
+	Always allow the next instruction in the model schedule to be added.
+	(debug_ready_list): Print the INSN_REG_PRESSURE_EXCESS_COST_CHANGE
+	and delay for SCHED_PRESSURE_MODEL too.
+	(prune_ready_list): Extend SCHED_PRIORITY_WEIGHTED handling to
+	SCHED_PRIORITY_MODEL, but also take the DFA into account.
+	(schedule_block): Call model_start_schedule and model_end_schedule.
+	Extend SCHED_PRIORITY_WEIGHTED stall handling to SCHED_PRIORITY_MODEL.
+	(sched_init): Extend INSN_REG_PRESSURE_EXCESS_COST_CHANGE handling
+	to SCHED_PRESSURE_MODEL, but don't allocate saved_reg_live or
+	region_ref_regs.
+	(sched_finish): Update accordingly.
+	(fix_tick_ready): Extend INSN_REG_PRESSURE_EXCESS_COST_CHANGE handling
+	to SCHED_PRESSURE_MODEL.
+	(add_jump_dependencies): Update call to dep_list_size.
+	(haifa_finish_h_i_d): Fix leak of max_reg_pressure.
+	(haifa_init_insn): Extend INSN_REG_PRESSURE_EXCESS_COST_CHANGE handling
+	to SCHED_PRESSURE_MODEL.
+	* sched-deps.c (init_insn_reg_pressure_info): Likewise, but don't
+	allocate INSN_MAX_REG_PRESSURE for SCHED_PRESSURE_MODEL.
+	(sched_analyze_insn): Extend INSN_REG_PRESSURE_EXCESS_COST_CHANGE
+	handling to SCHED_PRESSURE_MODEL.
+
+2012-02-08  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	* common.opt (fsched-pressure-algorithm=): New option.
+	* flag-types.h (sched_pressure_algorithm): New enum.
+	* sched-int.h (sched_pressure_p): Replace with...
+	(sched_pressure): ...this new variable.
+	* haifa-sched.c (sched_pressure_p): Replace with...
+	(sched_pressure): ...this new variable.
+	(sched_regno_pressure_class, rank_for_schedule, ready_sort)
+	(update_reg_and_insn_max_reg_pressure, schedule_insn)
+	(debug_ready_list, schedule_block, sched_init, sched_finish)
+	(fix_tick_ready, haifa_init_insn): Update accordingly.
+	* sched-deps.c (init_insn_reg_pressure_info): Likewise.
+	* sched-rgn.c (schedule_region): Likewise.
+
+2012-02-08  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-04-01  Bernd Schmidt  <bernds@codesourcery.com>
+
+	* haifa-sched.c (prune_ready_list): New function, broken out of
+	schedule_block.
+	(schedule_block): Use it.
+
+2012-02-07  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* LINARO-VERSION: Bump version.
+
+2012-02-07  Andrew Stubbs  <ams@codesourcery.com>
+
+	GCC Linaro 4.6-2012.02 released.
+
+	gcc/
+	* LINARO-VERSION: Update.
+
+2012-02-01  Andrew Stubbs  <ams@codesourcery.com>
+
+	Merge from FSF GCC 4.6.2 (svn branches/gcc-4_6-branch 183786).
+
+2012-01-20  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	Backport from mainline
+        2012-01-20  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	PR target/51819
+	* config/arm/arm.c (arm_print_operand): Correct output of alignment
+	hints for neon loads and stores.
+
+2012-01-16  Michael Hope  <michael.hope@linaro.org>
+
+	Backport from mainline r181210:
+
+	gcc/
+	2011-11-07  Matthew Gretton-Dann  <matthew.gretton-dann@arm.com>
+
+	* config/arm/arm-cores.def: Add -mcpu=cortex-a7.
+	* config/arm/arm-tables.opt: Regenerate.
+	* config/arm/arm-tune.md: Likewise.
+	* config/arm/bpabi.h (BE8_LINK_SPEC): Add Cortex A-7.
+	* doc/invoke.texi: Document -mcpu=cortex-a7.
+
+2012-01-16  Michael Hope  <michael.hope@linaro.org>
+
+	Backport from mainline r182561:
+
+	2011-12-20  Richard Henderson  <rth@redhat.com>
+
+	gcc/
+	* config/arm/arm.md (*arm_cmpdi_unsigned): Enable for thumb2.
+	* config/arm/arm.c (arm_select_cc_mode): Use it.
+
+2012-01-16  Michael Hope  <michael.hope@linaro.org>
+
+	Backport from mainline r183011:
+
+	2012-01-09  Matthew Gretton-Dann  <matthew.gretton-dann@arm.com>
+
+	* config/arm/arm-cores.def (cortex-a15): Use cortex_a15_tune for
+	tuning parameters.
+	* config/arm/arm.c (arm_cortex_a15_tune): New static variable.
+
+2012-01-18  Michael Hope  <michael.hope@linaro.org>
+
+	Backport from mainline r183126:
+
+	2012-01-12  Ira Rosen  <irar@il.ibm.com>
+
+	gcc/
+	PR tree-optimization/51799
+	* tree-vect-patterns.c (vect_recog_over_widening_pattern): Check
+	that the last operation is a type demotion.
+
+	gcc/testsuite/
+	* gcc.dg/vect/pr51799.c: New test.
+	* gcc.dg/vect/vect-widen-shift-u8.c: Expect two widening shift
+	patterns.
+
+2012-01-12  Ulrich Weigand  <ulrich.weigand@linaro.org>
+
+	LP 879725
+	Backport from mainline:
+
+	2012-01-02  Revital Eres  <revital.eres@linaro.org>
+
+	gcc/
+	* ddg.c (def_has_ccmode_p): New function.
+	(add_cross_iteration_register_deps,
+	create_ddg_dep_from_intra_loop_link): Call it.
+
+	gcc/testsuite/
+	* gcc.dg/sms-11.c: New file.
+
+2012-01-11  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* LINARO-VERSION: Bump version.
+
+2012-01-11  Andrew Stubbs  <ams@codesourcery.com>
+
+	GCC Linaro 4.6-2012.01 released.
+
+	gcc/
+	* LINARO-VERSION: Update.
+
+2012-01-06  Andrew Stubbs  <ams@codesourcery.com>
+
+	Backport from mainline:
+
+	2012-01-06  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/testsuite/
+	* gcc.target/arm/headmerge-2.c: Adjust scan pattern.
+
+2012-01-05  Andrew Stubbs  <ams@codesourcery.com>
+
+	Merge from FSF GCC 4.6.2 (svn branches/gcc-4_6-branch 182894).
+
+2012-01-05  Michael Hope  <michael.hope@linaro.org>
+
+	Backport from mainline r182271:
+
+	2011-12-13  Revital Eres  <revital.eres@linaro.org>
+
+	gcc/
+	* modulo-sched.c (mark_loop_unsched): Free bbs.
+
+2011-12-30  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-10-12  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* expr.h (copy_blkmode_to_reg): Declare.
+	* expr.c (copy_blkmode_to_reg): New function.
+	(expand_assignment): Don't expand register RESULT_DECLs before
+	the lhs.  Use copy_blkmode_to_reg to copy BLKmode values into a
+	RESULT_DECL register.
+	(expand_expr_real_1): Handle BLKmode decls when looking for promotion.
+	* stmt.c (expand_return): Move BLKmode-to-register code into
+	copy_blkmode_to_reg.
+
+2011-12-20  Ira Rosen  <ira.rosen@linaro.org>
+
+	Backport from mainline:
+
+	2011-11-29  Ira Rosen  <ira.rosen@linaro.org>
+
+	PR tree-optimization/51301
+	gcc/
+	* tree-vect-patterns.c (vect_recog_over_widening_pattern): Check that
+	the last statement doesn't convert to a bigger type than the original
+	type of the computation.
+
+	gcc/testsuite/
+	* gcc.dg/vect/pr51301.c: New test.
+
+2011-12-06  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* LINARO-VERSION: Bump version.
+
+2011-12-06  Andrew Stubbs  <ams@codesourcery.com>
+
+	GCC Linaro 4.6-2011.12 released.
+
+	gcc/
+	* LINARO-VERSION: Update.
+
+2011-12-06  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	Revert earlier commit for fixed to floating point conversions.
+	2011-12-05  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	gcc/
+	* config/arm/arm.c (vfp3_const_double_for_fract_bits): Define.
+	* config/arm/arm-protos.h (vfp3_const_double_for_fract_bits): Declare.
+	* config/arm/constraints.md ("Dt"): New constraint.
+	* config/arm/predicates.md (const_double_vcvt_power_of_two_reciprocal):
+	New.
+	* config/arm/vfp.md (*arm_combine_vcvt_f32_s32): New.
+	(*arm_combine_vcvt_f32_u32): New.
+
+2011-12-01  Andrew Stubbs  <ams@codesourcery.com>
+
+	Merge from FSF GCC 4.6.2 (svn branches/gcc-4_6-branch 181866).
+
+2011-12-05  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	Backport from mainline -A15 tuning.
+	2011-11-30  Matthew Gretton-Dann  <matthew.gretton-dann@arm.com>
+
+	* config/arm/arm.c (arm_issue_rate): Cortex-A15 can triple issue.
+	* config/arm/arm.md (mul64): New attribute.
+	(generic_sched): Cortex-A15 is not scheduled generically.
+	(cortex-a15.md): Include.
+	* config/arm/cortex-a15.md: New machine description.
+	* config/arm/t-arm (MD_INCLUDES): Add cortex-a15.md.
+
+        2011-11-30  Matthew Gretton-Dann  <matthew.gretton-dann@arm.com>
+	* config/arm/t-arm (MD_INCLUDES): Ensure all md files are listed.
+
+2011-12-05  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	gcc/
+	* config/arm/arm.c (vfp3_const_double_for_fract_bits): Define.
+	* config/arm/arm-protos.h (vfp3_const_double_for_fract_bits): Declare.
+	* config/arm/constraints.md ("Dt"): New constraint.
+	* config/arm/predicates.md (const_double_vcvt_power_of_two_reciprocal):
+	New.
+	* config/arm/vfp.md (*arm_combine_vcvt_f32_s32): New.
+	(*arm_combine_vcvt_f32_u32): New.
+
+2011-11-27  Ira Rosen  <ira.rosen@linaro.org>
+
+	Needs to be merged upstream:
+
+	gcc/
+	* tree-vect-patterns.c (widened_name_p): Rename to ...
+	(type_conversion_p): ... this.  Add new argument to determine
+	if it's a promotion or demotion operation.  Check for
+	CONVERT_EXPR_CODE_P instead of NOP_EXPR.
+	(vect_recog_dot_prod_pattern): Call type_conversion_p instead
+	widened_name_p.
+	(vect_recog_widen_mult_pattern, vect_recog_widen_sum_pattern,
+	vect_operation_fits_smaller_type, vect_recog_widen_shift_pattern):
+	Likewise.
+	(vect_recog_mixed_size_cond_pattern): Likewise and allow
+	non-constant then and else clauses.
+
+	gcc/testsuite/
+	* gcc.dg/vect/bb-slp-cond-3.c: New test.
+	* gcc.dg/vect/bb-slp-cond-4.c: New test.
+
+2011-11-28  David Alan Gilbert  <david.gilbert@linaro.org>
+
+	Backport from mainline (svn r19983):
+
+	2011-10-14  David Alan Gilbert  <david.gilbert@linaro.org>
+
+	gcc/testsuite/
+	* gcc.dg/di-longlong64-sync-1.c: New test.
+	* gcc.dg/di-sync-multithread.c: New test.
+	* gcc.target/arm/di-longlong64-sync-withhelpers.c: New test.
+	* gcc.target/arm/di-longlong64-sync-withldrexd.c: New test.
+	* lib/target-supports.exp: (arm_arch_*_ok): Series of effective-target
+	tests for v5, v6, v6k, and v7-a, and add-options helpers.
+	(check_effective_target_arm_arm_ok): New helper.
+	(check_effective_target_sync_longlong): New helper.
+
+2011-11-28  David Alan Gilbert  <david.gilbert@linaro.org>
+
+	Backport from mainline (svn r19982):
+
+	2011-10-14  David Alan Gilbert  <david.gilbert@linaro.org>
+
+	gcc/
+	* config/arm/linux-atomic-64bit.c: New (based on linux-atomic.c).
+	* config/arm/linux-atomic.c: Change comment to point to 64bit version.
+	(SYNC_LOCK_RELEASE): Instantiate 64bit version.
+	* config/arm/t-linux-eabi: Pull in linux-atomic-64bit.c.
+
+2011-11-28  David Alan Gilbert  <david.gilbert@linaro.org>
+
+	Backport from mainline (svn r19981):
+
+	2011-10-14  David Alan Gilbert  <david.gilbert@linaro.org>
+
+	gcc/
+	* config/arm/arm.c (arm_output_ldrex): Support ldrexd.
+	(arm_output_strex): Support strexd.
+	(arm_output_it): New helper to output it in Thumb2 mode only.
+	(arm_output_sync_loop): Support DI mode.  Change comment to
+	not support const_int.
+	(arm_expand_sync): Support DI mode.
+	* config/arm/arm.h (TARGET_HAVE_LDREXBHD): Split into LDREXBH
+	and LDREXD.
+	* config/arm/iterators.md (NARROW): move from sync.md.
+	(QHSD): New iterator for all current ARM integer modes.
+	(SIDI): New iterator for SI and DI modes only.
+	* config/arm/sync.md (sync_predtab): New mode_attr.
+	(sync_compare_and_swapsi): Fold into sync_compare_and_swap<mode>.
+	(sync_lock_test_and_setsi): Fold into sync_lock_test_and_setsi<mode>.
+	(sync_<sync_optab>si): Fold into sync_<sync_optab><mode>.
+	(sync_nandsi): Fold into sync_nand<mode>.
+	(sync_new_<sync_optab>si): Fold into sync_new_<sync_optab><mode>.
+	(sync_new_nandsi): Fold into sync_new_nand<mode>.
+	(sync_old_<sync_optab>si): Fold into sync_old_<sync_optab><mode>.
+	(sync_old_nandsi): Fold into sync_old_nand<mode>.
+	(sync_compare_and_swap<mode>): Support SI & DI.
+	(sync_lock_test_and_set<mode>): Likewise.
+	(sync_<sync_optab><mode>): Likewise.
+	(sync_nand<mode>): Likewise.
+	(sync_new_<sync_optab><mode>): Likewise.
+	(sync_new_nand<mode>): Likewise.
+	(sync_old_<sync_optab><mode>): Likewise.
+	(sync_old_nand<mode>): Likewise.
+	(arm_sync_compare_and_swapsi): Turn into iterator on SI & DI.
+	(arm_sync_lock_test_and_setsi): Likewise.
+	(arm_sync_new_<sync_optab>si): Likewise.
+	(arm_sync_new_nandsi): Likewise.
+	(arm_sync_old_<sync_optab>si): Likewise.
+	(arm_sync_old_nandsi): Likewise.
+	(arm_sync_compare_and_swap<mode> NARROW): use sync_predtab, fix indent.
+	(arm_sync_lock_test_and_setsi<mode> NARROW): Likewise.
+	(arm_sync_new_<sync_optab><mode> NARROW): Likewise.
+	(arm_sync_new_nand<mode> NARROW): Likewise.
+	(arm_sync_old_<sync_optab><mode> NARROW): Likewise.
+	(arm_sync_old_nand<mode> NARROW): Likewise.
+
+2011-11-28  David Alan Gilbert  <david.gilbert@linaro.org>
+
+	Backport from mainline (svn r19980):
+
+	2011-10-14  David Alan Gilbert  <david.gilbert@linaro.org>
+
+	PR target/48126
+
+	gcc/
+	* config/arm/arm.c (arm_output_sync_loop): Move label before barrier.
+
+2011-11-28  David Alan Gilbert  <david.gilbert@linaro.org>
+
+	Backport from mainline (svn r19979):
+
+	2011-10-14  David Alan Gilbert  <david.gilbert@linaro.org>
+
+	gcc/
+	* config/arm/arm.h (TARGET_HAVE_DMB_MCR): MCR Not available in Thumb1.
+
+
+2011-11-27  Ira Rosen  <ira.rosen@linaro.org>
+
+	Needs to be merged upstream:
+
+	gcc/
+	* tree-vectorizer.h (vect_pattern_recog): Add new argument.
+	* tree-vect-loop.c (vect_analyze_loop_2): Update call to
+	vect_pattern_recog.
+	* tree-vect-patterns.c (widened_name_p): Pass basic block
+	info to vect_is_simple_use.
+	(vect_recog_dot_prod_pattern): Fail for basic blocks.
+	(vect_recog_widen_sum_pattern): Likewise.
+	(vect_handle_widen_op_by_const): Support basic blocks.
+	(vect_operation_fits_smaller_type,
+	vect_recog_over_widening_pattern): Likewise.
+	(vect_recog_mixed_size_cond_pattern): Support basic blocks.
+	Add printing.
+	(vect_mark_pattern_stmts): Update calls to new_stmt_vec_info.
+	(vect_pattern_recog_1): Check for reduction only in loops.
+	(vect_pattern_recog): Add new argument.  Support basic blocks.
+	* tree-vect-stmts.c (vectorizable_conversion): Pass basic block
+	info to vect_is_simple_use_1.
+	* tree-vect-slp.c (vect_get_and_check_slp_defs): Support basic
+	blocks.
+	(vect_slp_analyze_bb_1): Call vect_pattern_recog.
+
+	gcc/testsuite/
+	* gcc.dg/vect/bb-slp-pattern-1.c: New test.
+	* gcc.dg/vect/bb-slp-pattern-2.c: New test.
+
+2011-11-22  Ira Rosen  <ira.rosen@linaro.org>
+
+	Backport from mainline:
+
+	2011-11-06  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/
+	* tree-vectorizer.h (vectorizable_condition): Add argument.
+	* tree-vect-loop.c (vectorizable_reduction): Fail for condition
+	in SLP.  Update calls to vectorizable_condition.
+	* tree-vect-stmts.c (vect_is_simple_cond): Add basic block info to
+	the arguments.  Pass it to vect_is_simple_use_1.
+	(vectorizable_condition): Add slp_node to the arguments.  Support
+	vectorization of basic blocks.  Fail for reduction in SLP.  Update
+	calls to vect_is_simple_cond and vect_is_simple_use.  Support SLP:
+	call vect_get_slp_defs to get vector operands.
+	(vect_analyze_stmt): Update calls to vectorizable_condition.
+	(vect_transform_stmt): Likewise.
+	* tree-vect-slp.c (vect_create_new_slp_node): Handle COND_EXPR.
+	(vect_get_and_check_slp_defs): Handle COND_EXPR.  Allow pattern
+	def stmts.
+	(vect_build_slp_tree): Handle COND_EXPR.
+	(vect_analyze_slp_instance): Push pattern statements to root node.
+	(vect_get_constant_vectors): Fix comments.  Handle COND_EXPR.
+
+	gcc/testsuite/
+	* gcc.dg/vect/bb-slp-cond-1.c: New test.
+	* gcc.dg/vect/slp-cond-1.c: New test.
+
+2011-11-22  Ira Rosen  <ira.rosen@linaro.org>
+
+	Backport from mainline:
+
+	2011-10-06  Jakub Jelinek  <jakub@redhat.com>
+
+	gcc/
+	PR tree-optimization/50596
+	* tree-vectorizer.h (vect_is_simple_cond): New prototype.
+	(NUM_PATTERNS): Change to 6.
+	* tree-vect-patterns.c (vect_recog_mixed_size_cond_pattern): New
+	function.
+	(vect_vect_recog_func_ptrs): Add vect_recog_mixed_size_cond_pattern.
+	(vect_mark_pattern_stmts): Don't create stmt_vinfo for def_stmt
+	if it already has one, and don't set STMT_VINFO_VECTYPE in it
+	if it is already set.
+	* tree-vect-stmts.c (vect_mark_stmts_to_be_vectorized): Handle
+	COND_EXPR in pattern stmts.
+	(vect_is_simple_cond): No longer static.
+
+	gcc/testsuite:
+	PR tree-optimization/50596
+	* gcc.dg/vect/vect-cond-8.c: New test.
+
+	2011-10-07  Jakub Jelinek  <jakub@redhat.com>
+
+	gcc/
+	PR tree-optimization/50650
+	* tree-vect-patterns.c (vect_recog_mixed_size_cond_pattern): Don't
+	call vect_is_simple_cond here, instead fail if cond_expr isn't
+	COMPARISON_CLASS_P or if get_vectype_for_scalar_type returns NULL
+	for cond_expr's first operand.
+	* tree-vect-stmts.c (vect_is_simple_cond): Static again.
+	* tree-vectorizer.h (vect_is_simple_cond): Remove prototype.
+
+
+	gcc/
+	* tree-vect-patterns.c (vect_recog_mixed_size_cond_pattern): Reduce
+	it to integral types only.
+
+	gcc/testsuite/
+	* gcc.dg/vect/pr30858.c: Expect the error message twice for targets
+	with multiple vector sizes.
+	* gcc.dg/vect/vect-cond-8.c: Rename to...
+	* gcc.dg/vect/vect-cond-8a.c: ... this and change the type from float
+	to int.
+	* lib/target-supports.exp (check_effective_target_vect_condition):
+	Return true for NEON.
+
+2011-11-21  Michael Hope  <michael.hope@linaro.org>
+
+	Backport from mainline r180131:
+
+	2011-10-18  Julian Brown  <julian@codesourcery.com>
+
+	gcc/
+	* config/arm/arm.c (arm_block_move_unaligned_straight)
+	(arm_adjust_block_mem, arm_block_move_unaligned_loop)
+	(arm_movmemqi_unaligned): New.
+	(arm_gen_movmemqi): Support unaligned block copies.
+
+	gcc/testsuite/
+	* lib/target-supports.exp (check_effective_target_arm_unaligned): New.
+	* gcc.target/arm/unaligned-memcpy-1.c: New.
+	* gcc.target/arm/unaligned-memcpy-2.c: New.
+	* gcc.target/arm/unaligned-memcpy-3.c: New.
+	* gcc.target/arm/unaligned-memcpy-4.c: New.
+
+	2011-09-15  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	gcc/
+	* config/arm/arm.h (TARGET_CPU_CPP_BUILTINS): New builtin macro.
+
+2011-11-17  Ira Rosen  <ira.rosen@linaro.org>
+
+	Backport from mainline:
+
+	2011-11-03  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/
+	* tree-vectorizer.h (slp_void_p): New.
+	(struct _slp_tree): Replace left and right with children.  Update
+	documentation.
+	(struct _slp_oprnd_info): New.
+	(vect_get_vec_defs): Declare.
+	(vect_get_slp_defs): Update arguments.
+	* tree-vect-loop.c (vect_create_epilog_for_reduction): Call
+	vect_get_vec_defs instead of vect_get_slp_defs.
+	(vectorizable_reduction): Likewise.
+	* tree-vect-stmts.c (vect_get_vec_defs): Remove static, add argument.
+	Update call to vect_get_slp_defs.
+	(vectorizable_conversion): Update call to vect_get_vec_defs.
+	(vectorizable_assignment, vectorizable_shift,
+	vectorizable_operation): Likewise.
+	(vectorizable_type_demotion): Call vect_get_vec_defs instead of
+	vect_get_slp_defs.
+	(vectorizable_type_promotion, vectorizable_store): Likewise.
+	(vect_analyze_stmt): Fix typo.
+	* tree-vect-slp.c (vect_free_slp_tree): Update SLP tree traversal.
+	(vect_print_slp_tree, vect_mark_slp_stmts,
+	vect_mark_slp_stmts_relevant, vect_slp_rearrange_stmts,
+	vect_detect_hybrid_slp_stmts, vect_slp_analyze_node_operations,
+	vect_schedule_slp_instance): Likewise.
+	(vect_create_new_slp_node): New.
+	(vect_create_oprnd_info, vect_free_oprnd_info): Likewise.
+	(vect_get_and_check_slp_defs): Pass information about defs using
+	oprnds_info, allow any number of operands.
+	(vect_build_slp_tree): Likewise.  Update calls to
+	vect_get_and_check_slp_defs.  Fix comments.
+	(vect_analyze_slp_instance): Move node creation to
+	vect_create_new_slp_node.
+	(vect_get_slp_defs): Allow any number of operands.
+
+	2011-11-11  Jakub Jelinek  <jakub@redhat.com>
+
+	gcc/
+	* tree-vect-slp.c (vect_free_slp_tree): Also free SLP_TREE_CHILDREN
+	vector.
+	(vect_create_new_slp_node): Don't allocate node before checking stmt
+	type.
+	(vect_free_oprnd_info): Remove FREE_DEF_STMTS argument, always
+	free def_stmts vectors and additionally free oprnd_info.
+	(vect_build_slp_tree): Adjust callers.  Call it even if
+	stop_recursion.  If vect_create_new_slp_node or
+	vect_build_slp_tree fails, properly handle freeing memory.
+	If it succeeded, clear def_stmts in oprnd_info.
+
+2011-11-02  Andrew Stubbs  <ams@codesourcery.com>
+
+	Backport from FSF mainline:
+
+	2011-11-01  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* config/arm/bpabi.h (BE8_LINK_SPEC): Recognize generic-armv7 tuning.
+
+2011-11-08  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* LINARO-VERSION: Bump version.
+
+2011-11-08  Andrew Stubbs  <ams@codesourcery.com>
+
+	GCC Linaro 4.6-2011.11 released.
+
+	gcc/
+	* LINARO-VERSION: Update.
+
+2011-11-04  Revital Eres  <revital.eres@linaro.org>
+
+	Backport from mainline -r180673:
+
+	gcc/
+	* modulo-sched.c (generate_prolog_epilog): Mark prolog
+	and epilog as BB_DISABLE_SCHEDULE.
+	(mark_loop_unsched): New function.
+	(sms_schedule): Call it.
+
+2011-10-26  Andrew Stubbs  <ams@codesourcery.com>
+
+	Merge FSF GCC 4.6.2 Release (svn branches/gcc-4_6-branch 180515).
+
+2011-10-27  Ira Rosen  <ira.rosen@linaro.org>
+
+	Backport from mainline:
+
+	2011-10-18  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/
+	* doc/md.texi (vec_widen_ushiftl_hi, vec_widen_ushiftl_lo,
+	vec_widen_sshiftl_hi, vec_widen_sshiftl_lo): Document.
+	* tree-pretty-print.c (dump_generic_node): Handle WIDEN_LSHIFT_EXPR,
+	VEC_WIDEN_LSHIFT_HI_EXPR and VEC_WIDEN_LSHIFT_LO_EXPR.
+	(op_code_prio): Likewise.
+	(op_symbol_code): Handle WIDEN_LSHIFT_EXPR.
+	* optabs.c (optab_for_tree_code): Handle
+	VEC_WIDEN_LSHIFT_HI_EXPR and VEC_WIDEN_LSHIFT_LO_EXPR.
+	(init-optabs): Initialize optab codes for vec_widen_u/sshiftl_hi/lo.
+	* optabs.h (enum optab_index): Add OTI_vec_widen_u/sshiftl_hi/lo.
+	* genopinit.c (optabs): Initialize the new optabs.
+	* expr.c (expand_expr_real_2): Handle
+	VEC_WIDEN_LSHIFT_HI_EXPR and VEC_WIDEN_LSHIFT_LO_EXPR.
+	* gimple-pretty-print.c (dump_binary_rhs): Likewise.
+	* tree-vectorizer.h (NUM_PATTERNS): Increase to 8.
+	* tree.def (WIDEN_LSHIFT_EXPR, VEC_WIDEN_LSHIFT_HI_EXPR,
+	VEC_WIDEN_LSHIFT_LO_EXPR): New.
+	* cfgexpand.c (expand_debug_expr): Handle new tree codes.
+	* tree-vect-patterns.c (vect_vect_recog_func_ptrs): Add
+	vect_recog_widen_shift_pattern.
+	(vect_handle_widen_mult_by_const): Rename...
+	(vect_handle_widen_op_by_const): ...to this.  Handle shifts.
+	Add a new argument, update documentation.
+	(vect_recog_widen_mult_pattern): Assume that only second
+	operand can be constant.  Update call to
+	vect_handle_widen_op_by_const.
+	(vect_recog_over_widening_pattern): Fix typo.
+	(vect_recog_widen_shift_pattern): New.
+	* tree-vect-stmts.c (vectorizable_type_promotion): Handle
+	widening shifts.
+	(supportable_widening_operation): Likewise.
+	* tree-inline.c (estimate_operator_cost): Handle new tree codes.
+	* tree-vect-generic.c (expand_vector_operations_1): Likewise.
+	* tree-cfg.c (verify_gimple_assign_binary): Likewise.
+	* config/arm/neon.md (neon_vec_<US>shiftl_<mode>): New.
+	(vec_widen_<US>shiftl_lo_<mode>, neon_vec_<US>shiftl_hi_<mode>,
+	vec_widen_<US>shiftl_hi_<mode>, neon_vec_<US>shift_left_<mode>):
+	Likewise.
+	* config/arm/predicates.md (const_neon_scalar_shift_amount_operand):
+	New.
+	* config/arm/iterators.md (V_innermode): New.
+	* tree-vect-slp.c (vect_build_slp_tree): Require same shift operand
+	for widening shift.
+
+	gcc/testsuite
+	* testsuite/lib/target-supports.exp
+	(check_effective_target_vect_widen_shift): New.
+	* gcc.dg/vect/vect-widen-shift-s16.c: New.
+	* gcc.dg/vect/vect-widen-shift-s8.c: New.
+	* gcc.dg/vect/vect-widen-shift-u16.c: New.
+	* gcc.dg/vect/vect-widen-shift-u8.c: New.
+
+	2011-10-06  Jakub Jelinek  <jakub@redhat.com>
+
+	gcc/
+	* tree-vect-patterns.c (vect_pattern_recog_1): Use
+	vect_recog_func_ptr typedef for the first argument.
+	(vect_pattern_recog): Rename vect_recog_func_ptr variable
+	to vect_recog_func, use vect_recog_func_ptr typedef for it.
+
+	2011-10-16  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/
+	PR tree-optimization/50727
+	* tree-vect-patterns.c (vect_operation_fits_smaller_type): Add
+	DEF_STMT to the list of statements to be replaced by the
+	pattern statements.
+
+	2011-10-09  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/
+	PR tree-optimization/50635
+	* tree-vect-patterns.c (vect_handle_widen_mult_by_const): Add
+	DEF_STMT to the list of statements to be replaced by the
+	pattern statements.
+	(vect_handle_widen_mult_by_const): Don't check TYPE_OUT.
+
+2011-10-27  Ira Rosen  <ira.rosen@linaro.org>
+
+	Backport from mainline:
+	2011-10-16  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/
+	* tree-vect-stmts.c (vectorizable_load): For SLP without permutation
+	treat the first load of the node as the first element in its
+	interleaving chain.
+	* tree-vect-slp.c (vect_get_and_check_slp_defs): Swap the operands if
+	necessary and possible.
+	(vect_build_slp_tree): Add new argument.  Allow load groups of any size
+	in basic blocks.  Keep all the loads for further permutation check.
+	Use the new argument to determine if there is a permutation.  Update
+	the recursive calls.
+	(vect_supported_load_permutation_p): Allow subchains of interleaving
+	chains in basic block vectorization.
+	(vect_analyze_slp_instance): Update the call to vect_build_slp_tree.
+	Check load permutation based on the new parameter.
+	(vect_schedule_slp_instance): Don't start from the first element in
+	interleaving chain unless the loads are permuted.
+
+	gcc/testsuite/
+	* gcc.dg/vect/bb-slp-29.c: New test.
+
+2011-10-21  Andrew Stubbs  <ams@codesourcery.com>
+
+	Backport from FSF mainline:
+
+	2011-10-21  Andrew Stubbs  <ams@codesourcery.com>
+
+	PR target/50809
+
+	gcc/
+	* config/arm/driver-arm.c (vendors): Make static.
+
+2011-10-19  Andrew Stubbs  <ams@codesourcery.com>
+
+	Backport from FSF:
+
+	2011-10-18  Andrew Stubbs  <ams@codesourcery.com>
+
+	PR tree-optimization/50717
+
+	gcc/
+	* tree-ssa-math-opts.c (is_widening_mult_p): Remove the 'type'
+	parameter.  Calculate 'type' from stmt.
+	(convert_mult_to_widen): Update call the is_widening_mult_p.
+	(convert_plusminus_to_widen): Likewise.
+
+	gcc/testsuite/
+	* gcc.dg/pr50717-1.c: New file.
+	* gcc.target/arm/wmul-12.c: Correct types.
+	* gcc.target/arm/wmul-8.c: Correct types.
+
+2011-10-19  Andrew Stubbs  <ams@codesourcery.com>
+
+	Backport from FSF:
+	
+	2011-10-18  Andrew Stubbs  <ams@codesourcery.com>
+
+	* config/arm/driver-arm.c (host_detect_local_cpu): Close the file
+	before exiting.
+
+	2011-10-18  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* config.host (arm*-*-linux*): Add driver-arm.o and x-arm.
+	* config/arm/arm.opt: Add 'native' processor_type and
+	arm_arch enum values.
+	* config/arm/arm.h (host_detect_local_cpu): New prototype.
+	(EXTRA_SPEC_FUNCTIONS): New define.
+	(MCPU_MTUNE_NATIVE_SPECS): New define.
+	(DRIVER_SELF_SPECS): New define.
+	* config/arm/driver-arm.c: New file.
+	* config/arm/x-arm: New file.
+	* doc/invoke.texi (ARM Options): Document -mcpu=native,
+	-mtune=native and -march=native.
+
+2011-10-19  Andrew Stubbs  <ams@codesourcery.com>
+
+	Backport from FSF:
+
+	2011-09-09  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* config/arm/arm-cores.def (generic-armv7-a): New architecture.
+	* config/arm/arm-tables.opt: Regenerate.
+	* config/arm/arm-tune.md: Regenerate.
+	* config/arm/arm.c (arm_file_start): Output .arch directive when
+	user passes -mcpu=generic-*.
+	(arm_issue_rate): Add genericv7a support.
+	* config/arm/arm.h (EXTRA_SPECS): Add asm_cpu_spec.
+	(ASM_CPU_SPEC): New define.
+	* config/arm/elf.h (ASM_SPEC): Use %(asm_cpu_spec).
+	* config/arm/semi.h (ASM_SPEC): Likewise.
+	* doc/invoke.texi (ARM Options): Document -mcpu=generic-*
+	and -mtune=generic-*.
+
+2011-10-17  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-10-10  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* modulo-sched.c (ps_reg_move_info): Add num_consecutive_stages.
+	(SCHED_FIRST_REG_MOVE, SCHED_NREG_MOVES): Delete.
+	(node_sched_params): Remove first_reg_move and nreg_moves.
+	(ps_num_consecutive_stages, extend_node_sched_params): New functions.
+	(update_node_sched_params): Move up file.
+	(print_node_sched_params): Print the stage.  Don't dump info related
+	to first_reg_move and nreg_moves.
+	(set_columns_for_row): New function.
+	(set_columns_for_ps): Move up file and use set_columns_for_row.
+	(schedule_reg_move): New function.
+	(schedule_reg_moves): Call extend_node_sched_params and
+	schedule_reg_move.  Extend size of uses bitmap.  Initialize
+	num_consecutive_stages.  Return false if a move could not be
+	scheduled.
+	(apply_reg_moves): Don't emit moves here.
+	(permute_partial_schedule): Handle register moves.
+	(duplicate_insns_of_cycles): Remove for_prolog.  Emit moves according
+	to the same stage-count test as ddg nodes.
+	(generate_prolog_epilog): Update calls accordingly.
+	(sms_schedule): Allow move-scheduling to add a new first stage.
+
+2011-10-17  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-10-10  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* modulo-sched.c (ps_insn): Adjust comment.
+	(ps_reg_move_info): New structure.
+	(partial_schedule): Add reg_moves field.
+	(SCHED_PARAMS): Use node_sched_param_vec instead of node_sched_params.
+	(node_sched_params): Turn first_reg_move into an identifier.
+	(ps_reg_move): New function.
+	(ps_rtl_insn): Cope with register moves.
+	(ps_first_note): Adjust comment and assert that the instruction
+	isn't a register move.
+	(node_sched_params): Replace with...
+	(node_sched_param_vec): ...this vector.
+	(set_node_sched_params): Adjust accordingly.
+	(print_node_sched_params): Take a partial schedule instead of a ddg.
+	Use ps_rtl_insn and ps_reg_move.
+	(generate_reg_moves): Rename to...
+	(schedule_reg_moves): ...this.  Remove rescan parameter.  Record each
+	move in the partial schedule, but don't emit it here.  Don't perform
+	register substitutions here either.
+	(apply_reg_moves): New function.
+	(duplicate_insns_of_cycles): Use register indices directly,
+	rather than finding instructions using PREV_INSN.  Use ps_reg_move.
+	(sms_schedule): Call schedule_reg_moves before committing to
+	a partial schedule.   Try the next ii if the schedule fails.
+	Use apply_reg_moves instead of generate_reg_moves.  Adjust
+	call to print_node_sched_params.  Free node_sched_param_vec
+	instead of node_sched_params.
+	(create_partial_schedule): Initialize reg_moves.
+	(free_partial_schedule): Free reg_moves.
+
+2011-10-17  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-10-10  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* modulo-sched.c (ps_insn): Replace node field with an identifier.
+	(SCHED_ASAP): Replace with..
+	(NODE_ASAP): ...this macro.
+	(SCHED_PARAMS): New macro.
+	(SCHED_TIME, SCHED_FIRST_REG_MOVE, SCHED_NREG_MOVES, SCHED_ROW)
+	(SCHED_STAGE, SCHED_COLUMN): Redefine using SCHED_PARAMS.
+	(node_sched_params): Remove asap.
+	(ps_rtl_insn, ps_first_note): New functions.
+	(set_node_sched_params): Use XCNEWVEC.  Don't copy across the
+	asap values.
+	(print_node_sched_params): Use SCHED_PARAMS and NODE_ASAP.
+	(generate_reg_moves): Pass ids to the SCHED_* macros.
+	(update_node_sched_params): Take a ps insn identifier rather than
+	a node as parameter.  Use ps_rtl_insn.
+	(set_columns_for_ps): Update for above field and SCHED_* macro changes.
+	(permute_partial_schedule): Use ps_rtl_insn and ps_first_note.
+	(optimize_sc): Update for above field and SCHED_* macro changes.
+	Update calls to try_scheduling_node_in_cycle and
+	update_node_sched_params.
+	(duplicate_insns_of_cycles): Adjust for above field and SCHED_*
+	macro changes.  Use ps_rtl_insn and ps_first_note.
+	(sms_schedule): Pass ids to the SCHED_* macros.
+	(get_sched_window): Adjust for above field and SCHED_* macro changes.
+	Use NODE_ASAP instead of SCHED_ASAP.
+	(try_scheduling_node_in_cycle): Remove node parameter.  Update
+	call to ps_add_node_check_conflicts.  Pass ids to the SCHED_*
+	macros.
+	(sms_schedule_by_order): Update call to try_scheduling_node_in_cycle.
+	(ps_insert_empty_row): Adjust for above field changes.
+	(compute_split_row): Use ids rather than nodes.
+	(verify_partial_schedule): Adjust for above field changes.
+	(print_partial_schedule): Use ps_rtl_insn.
+	(create_ps_insn): Take an id rather than a node.
+	(ps_insn_find_column): Adjust for above field changes.
+	Use ps_rtl_insn.
+	(ps_insn_advance_column): Adjust for above field changes.
+	(add_node_to_ps): Remove node parameter.  Update call to
+	create_ps_insn.
+	(ps_has_conflicts): Use ps_rtl_insn.
+	(ps_add_node_check_conflicts): Replace node parameter than an id.
+
+2011-10-17  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-10-10  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* modulo-sched.c (undo_replace_buff_elem): Delete.
+	(generate_reg_moves): Don't build and return an undo list.
+	(free_undo_replace_buff): Delete.
+	(sms_schedule): Adjust call to generate_reg_moves.
+	Don't call free_undo_replace_buff.
+
+2011-10-17  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-08-08  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* modulo-sched.c (get_sched_window): Use a table for the debug output.
+	Print the current ii.
+	(sms_schedule_by_order): Reduce whitespace in dump line.
+
+2011-10-17  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-08-08  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* modulo-sched.c (get_sched_window): Use just one loop for predecessors
+	and one loop for successors.  Fix upper bound of memory range.
+
+2011-10-17  Michael Hope  <michael.hope@linaro.org>
+
+	Backport from mainline r178852:
+
+	2011-09-14  Julian Brown  <julian@codesourcery.com>
+
+	gcc/
+	* config/arm/arm.c (arm_override_options): Add unaligned_access
+	support.
+	(arm_file_start): Emit attribute for unaligned access as appropriate.
+	* config/arm/arm.md (UNSPEC_UNALIGNED_LOAD)
+	(UNSPEC_UNALIGNED_STORE): Add constants for unspecs.
+	(insv, extzv): Add unaligned-access support.
+	(extv): Change to expander. Likewise.
+	(extzv_t1, extv_regsi): Add helpers.
+	(unaligned_loadsi, unaligned_loadhis, unaligned_loadhiu)
+	(unaligned_storesi, unaligned_storehi): New.
+	(*extv_reg): New (previous extv implementation).
+	* config/arm/arm.opt (munaligned_access): Add option.
+	* config/arm/constraints.md (Uw): New constraint.
+	* expmed.c (store_bit_field_1): Adjust bitfield numbering according
+	to size of access, not size of unit, when BITS_BIG_ENDIAN !=
+	BYTES_BIG_ENDIAN. Don't use bitfield accesses for
+	volatile accesses when -fstrict-volatile-bitfields is in effect.
+	(extract_bit_field_1): Likewise.
+
+	Backport from mainline r172697:
+
+	2011-04-19  Wei Guozhi  <carrot@google.com>
+
+	PR target/47855
+	gcc/
+	* config/arm/arm-protos.h (thumb1_legitimate_address_p): New prototype.
+	* config/arm/arm.c (thumb1_legitimate_address_p): Remove the static
+	linkage.
+	* config/arm/constraints.md (Uu): New constraint.
+	* config/arm/arm.md (*arm_movqi_insn): Compute attr "length".
+
+2011-10-16  Ira Rosen  <ira.rosen@linaro.org>
+
+	Backport from mainline:
+
+	2011-09-27  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/
+	* tree-vect-stmts.c (vectorizable_type_demotion): Handle basic block
+	vectorization.
+	(vectorizable_type_promotion): Likewise.
+	(vect_analyze_stmt): Call vectorizable_type_demotion and
+	vectorizable_type_promotion for basic blocks.
+	(supportable_widening_operation): Don't assume loop vectorization.
+	* tree-vect-slp.c (vect_build_slp_tree): Allow multiple types for
+	basic blocks.  Update vectorization factor for basic block
+	vectorization.
+	(vect_analyze_slp_instance): Allow multiple types for basic block
+	vectorization.  Recheck unrolling factor after construction of SLP
+	instance.
+
+	gcc/testsuite/
+	* gcc.dg/vect/bb-slp-11.c: Expect to get vectorized with 64-bit
+	vectors.
+	* gcc.dg/vect/bb-slp-27.c: New.
+	* gcc.dg/vect/bb-slp-28.c: New.
+
+
+	2011-10-04  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/testsuite/
+	* lib/target-supports.exp (check_effective_target_vect_multiple_sizes):
+	Make et_vect_multiple_sizes_saved global.
+	(check_effective_target_vect64): Make et_vect64_saved global.
+
+2011-10-13  Andrew Stubbs  <ams@codesourcery.com>
+
+	Backport from mainline:
+
+	2011-10-07  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* config/arm/predicates.md (shift_amount_operand): Remove constant
+	range check.
+	(shift_operator): Check range of constants for all shift operators.
+
+	gcc/testsuite/
+	* gcc.dg/pr50193-1.c: New file.
+	* gcc.target/arm/shiftable.c: New file.
+
+2011-10-11  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* LINARO-VERSION: Bump version.
+
+2011-10-11  Andrew Stubbs  <ams@codesourcery.com>
+
+	GCC Linaro 4.6-2011.10 released.
+
+	gcc/
+	* LINARO-VERSION: Update.
+
+2011-10-04  Andrew Stubbs  <ams@codesourcery.com>
+
+	Merge from FSF GCC 4.6.1 (svn branches/gcc-4_6-branch 179483).
+
+2011-10-06  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/testsuite/
+	* gcc.dg/vect/bb-slp-26.c: Simplify to make the basic block
+	vectorizable.
+
+	Backport from mainline:
+
+	2011-09-25  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/
+	* tree-vect-slp.c (vect_slp_analyze_bb_1): Split out core part
+	of vect_analyze_bb here.
+	(vect_analyze_bb): Loop over vector sizes calling vect_analyze_bb_1.
+
+	gcc/testsuite/
+	* lib/target-supports.exp (check_effective_target_vect64): New.
+	* gcc.dg/vect/bb-slp-11.c: Expect the error message twice in case
+	of multiple vector sizes.
+	* gcc.dg/vect/bb-slp-26.c: New.
+
+2011-10-06  Ira Rosen  <ira.rosen@linaro.org>
+
+	Backport from mainline:
+
+	2011-09-25  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/
+	* tree-data-ref.c (dr_analyze_innermost): Add new argument.
+	Allow not simple iv if analyzing basic block.
+	(create_data_ref): Update call to dr_analyze_innermost.
+	(stmt_with_adjacent_zero_store_dr_p, ref_base_address): Likewise.
+	* tree-loop-distribution.c (generate_memset_zero): Likewise.
+	* tree-predcom.c (find_looparound_phi): Likewise.
+	* tree-data-ref.h (dr_analyze_innermost): Add new argument.
+
+	gcc/testsuite/
+	* gcc.dg/vect/bb-slp-24.c: New.
+
+
+	2011-09-15  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/
+	* tree-vect-data-refs.c (vect_analyze_data_ref_dependence): Allow
+	read-after-read dependencies in basic block SLP.
+
+	gcc/testsuite/
+	* gcc.dg/vect/bb-slp-25.c: New.
+
+
+	2011-04-21  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	* tree-vect-data-refs.c (vect_drs_dependent_in_basic_block): Use
+	operand_equal_p to compare DR_BASE_ADDRESSes.
+	(vect_check_interleaving): Likewise.
+
+	gcc/testsuite/
+	* gcc.dg/vect/vect-119.c: New test.
+
+2011-10-03  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-09-22  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* config/arm/predicates.md (expandable_comparison_operator): New
+	predicate, extracted from...
+	(arm_comparison_operator): ...here.
+	* config/arm/arm.md (cbranchsi4, cbranchsf4, cbranchdf4, cbranchdi4)
+	(cstoresi4, cstoresf4, cstoredf4, cstoredi4, movsicc, movsfcc)
+	(movdfcc): Use expandable_comparison_operator.
+
+	gcc/testsuite/
+	Backport from mainline:
+
+	2011-09-22  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* gcc.target/arm/cmp-1.c: New test.
+	* gcc.target/arm/cmp-2.c: Likewise.
+
+2011-10-03  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-09-07  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	PR target/49030
+	* config/arm/arm-protos.h (maybe_get_arm_condition_code): Declare.
+	* config/arm/arm.c (maybe_get_arm_condition_code): New function,
+	reusing the old code from get_arm_condition_code.  Return ARM_NV
+	for invalid comparison codes.
+	(get_arm_condition_code): Redefine in terms of
+	maybe_get_arm_condition_code.
+	* config/arm/predicates.md (arm_comparison_operator): Use
+	maybe_get_arm_condition_code.
+
+	gcc/testsuite/
+	Backport from mainline:
+
+	2011-09-07  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	PR target/49030
+	* gcc.dg/torture/pr49030.c: New test.
+
+2011-10-03  Michael Hope  <michael.hope@linaro.org>
+
+	Backport from mainline:
+
+	2011-09-13  Sevak Sargsyan <sevak.sargsyan@ispras.ru>
+
+	gcc/
+	* config/arm/neon.md (neon_vabd<mode>_2, neon_vabd<mode>_3): New
+	define_insn patterns for combine.
+
+	gcc/testsuite/
+	* gcc.target/arm/neon-combine-sub-abs-into-vabd.c: New test.
+
+2011-10-01  Revital Eres  <revital.eres@linaro.org> 
+
+	gcc/
+	Backport from mainline -r179380 and -r179381
+
+	* ddg.c (autoinc_var_is_used_p): New function.
+	(create_ddg_dep_from_intra_loop_link,
+	add_cross_iteration_register_deps): Call it.
+	* ddg.h (autoinc_var_is_used_p): Declare.
+	* modulo-sched.c (sms_schedule): Handle instructions with REG_INC.
+	(generate_reg_moves): Call autoinc_var_is_used_p.  Skip
+	instructions that do not set a register and verify no regmoves
+	are created for !single_set instructions.
+
+	gcc/testsuite/
+
+	* gcc.dg/sms-10.c: New file
+
+2011-09-28  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-09-28  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* config/arm/neon.md (neon_move_lo_quad_<mode>): Delete.
+	(neon_move_hi_quad_<mode>): Likewise.
+	(move_hi_quad_<mode>, move_lo_quad_<mode>): Use subreg moves.
+
+2011-09-28  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-09-27  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* config/arm/neon.md (neon_vget_highv16qi, neon_vget_highv8hi)
+	(neon_vget_highv4si, neon_vget_highv4sf, neon_vget_highv2di)
+	(neon_vget_lowv16qi, neon_vget_lowv8hi, neon_vget_lowv4si)
+	(neon_vget_lowv4sf, neon_vget_lowv2di): Turn into define_expands
+	that produce subreg moves.  Define using VQX iterators.
+
+2011-09-28  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-09-14  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* simplify-rtx.c (simplify_subreg): Check that the inner mode is
+	a scalar integer before applying integer-only optimisations to
+	inner arithmetic.
+
+2011-09-25  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/testsuite/
+	* lib/target-supports.exp (check_effective_target_vect_multiple_sizes):
+	Replace check_effective_target_arm_neon with
+	check_effective_target_arm_neon_ok.
+
+	Backport from mainline:
+
+	2011-09-06  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/
+	* config/arm/arm.c (arm_preferred_simd_mode): Check
+	TARGET_NEON_VECTORIZE_DOUBLE instead of
+	TARGET_NEON_VECTORIZE_QUAD.
+	(arm_autovectorize_vector_sizes): Likewise.
+	* config/arm/arm.opt (mvectorize-with-neon-quad): Make inverse
+	mask of mvectorize-with-neon-double.  Add RejectNegative.
+	(mvectorize-with-neon-double): New.
+
+	gcc/testsuite/
+	* lib/target-supports.exp (check_effective_target_vect_multiple_sizes):
+	New procedure.
+	(add_options_for_quad_vectors): Replace with ...
+	(add_options_for_double_vectors): ... this.
+	* gfortran.dg/vect/pr19049.f90: Expect more printings on targets that
+	support multiple vector sizes since the vectorizer attempts to
+	vectorize with both vector sizes.
+	* gcc.dg/vect/no-vfa-vect-79.c,
+	gcc.dg/vect/no-vfa-vect-102a.c, gcc.dg/vect/vect-outer-1a.c,
+	gcc.dg/vect/vect-outer-1b.c, gcc.dg/vect/vect-outer-2b.c,
+	gcc.dg/vect/vect-outer-3a.c, gcc.dg/vect/no-vfa-vect-37.c,
+	gcc.dg/vect/vect-outer-3b.c, gcc.dg/vect/no-vfa-vect-101.c,
+	gcc.dg/vect/no-vfa-vect-102.c, gcc.dg/vect/vect-reduc-dot-s8b.c,
+	gcc.dg/vect/vect-outer-1.c, gcc.dg/vect/vect-104.c: Likewise.
+	* gcc.dg/vect/vect-42.c: Run with 64 bit vectors if applicable.
+	* gcc.dg/vect/vect-multitypes-6.c, gcc.dg/vect/vect-52.c,
+	gcc.dg/vect/vect-54.c, gcc.dg/vect/vect-46.c, gcc.dg/vect/vect-48.c,
+	gcc.dg/vect/vect-96.c, gcc.dg/vect/vect-multitypes-3.c,
+	gcc.dg/vect/vect-40.c: Likewise.
+	* gcc.dg/vect/vect-outer-5.c: Remove quad-vectors option as
+	redundant.
+	* gcc.dg/vect/vect-109.c, gcc.dg/vect/vect-peel-1.c,
+	gcc.dg/vect/vect-peel-2.c, gcc.dg/vect/slp-25.c,
+	gcc.dg/vect/vect-multitypes-1.c, gcc.dg/vect/slp-3.c,
+	gcc.dg/vect/no-vfa-pr29145.c, gcc.dg/vect/vect-multitypes-4.c:
+	Likewise.
+	* gcc.dg/vect/vect-peel-4.c: Make ia global.
+	
+2011-09-22  Revital Eres  <revital.eres@linaro.org>
+
+	Backport from trunk -r178804:
+	* modulo-sched.c (remove_node_from_ps): Return void
+	instead of bool.
+	(optimize_sc): Adjust call to remove_node_from_ps.
+	(sms_schedule): Add print info.
+
+2011-09-16  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	* LINARO-VERSION: Bump version.
+
+2011-09-15  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	GCC Linaro 4.6-2011.09-1 released.
+
+	gcc/
+	* LINARO-VERSION: Update.
+
+2011-09-15  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	Revert:
+
+	gcc/
+	PR target/49030
+	* config/arm/arm-protos.h (maybe_get_arm_condition_code): Declare.
+	* config/arm/arm.c (maybe_get_arm_condition_code): New function,
+	reusing the old code from get_arm_condition_code.  Return ARM_NV
+	for invalid comparison codes.
+	(get_arm_condition_code): Redefine in terms of
+	maybe_get_arm_condition_code.
+	* config/arm/predicates.md (arm_comparison_operator): Use
+	maybe_get_arm_condition_code.
+
+	gcc/testsuite/
+	PR target/49030
+	* gcc.dg/torture/pr49030.c: New test.
+
+2011-09-13  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* LINARO-VERSION: Bump version.
+
+2011-09-13  Andrew Stubbs  <ams@codesourcery.com>
+
+	GCC Linaro 4.6-2011.09 released.
+
+	gcc/
+	* LINARO-VERSION: Update.
+
+2011-09-12  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	Backport from FSF mainline:
+        2011-04-06  Wei Guozhi  <carrot@google.com>
+
+        PR target/47855
+	gcc/
+        * config/arm/arm.md (arm_cmpsi_insn): Compute attr "length".
+        (arm_cond_branch): Likewise.
+        (arm_cond_branch_reversed): Likewise.
+        (arm_jump): Likewise.
+        (push_multi): Likewise.
+        * config/arm/constraints.md (Py): New constraint.
+
+	2011-04-08  Wei Guozhi  <carrot@google.com>
+
+	PR target/47855
+	* config/arm/arm-protos.h (arm_attr_length_push_multi): New prototype.
+	* config/arm/arm.c (arm_attr_length_push_multi): New function.
+	* config/arm/arm.md (*push_multi): Change the length computation to
+	call a C function.
+
+2011-09-12  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	Backport from FSF mainline:
+
+        2011-08-18  Jiangning Liu  <jiangning.liu@arm.com>
+
+	gcc/
+	* config/arm/arm.md (*ior_scc_scc): Enable for Thumb2 as well.
+	(*ior_scc_scc_cmp): Likewise
+	(*and_scc_scc): Likewise.
+	(*and_scc_scc_cmp): Likewise.
+	(*and_scc_scc_nodom): Likewise.
+	(*cmp_ite0, *cmp_ite1, *cmp_and, *cmp_ior): Handle Thumb2.
+
+	gcc/testsuite
+	* gcc.target/arm/thumb2-cond-cmp-1.c: New. Make sure conditional
+	compare can be generated.
+	* gcc.target/arm/thumb2-cond-cmp-2.c: Likewise.
+	* gcc.target/arm/thumb2-cond-cmp-3.c: Likewise.
+	* gcc.target/arm/thumb2-cond-cmp-4.c: Likewise.
+
+2011-09-12  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	gcc/testsuite/
+	* gcc.target/arm/pr50099.c: Fix testcase from previous commit.
+
+2011-09-12  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	LP:838994
+	gcc/
+	Backport from mainline.
+
+        2011-09-06  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+        PR target/50099
+	* config/arm/iterators.md (qhs_zextenddi_cstr): New.
+	(qhs_zextenddi_op): New.
+	* config/arm/arm.md ("zero_extend<mode>di2"): Use them.
+	* config/arm/predicates.md ("arm_extendqisi_mem_op"):
+	Distinguish between ARM and Thumb2 states.
+
+	gcc/testsuite/
+	* gcc.target/arm/pr50099.c: New test.
+
+2011-09-12  Andrew Stubbs  <ams@codesourcery.com>
+
+	Backport from FSF mainline:
+
+	2011-09-08  Andrew Stubbs  <ams@codesourcery.com>
+
+	PR tree-optimization/50318
+
+	gcc/
+	* tree-ssa-math-opts.c (convert_plusminus_to_widen): Correct
+	typo in use of mult_rhs1 and mult_rhs2.
+
+	gcc/testsuite/
+	* gcc.target/arm/pr50318-1.c: New file.
+
+2011-09-01  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* config/arm/predicates.md (shift_amount_operand): Ensure shift
+	amount is positive.
+
+	gcc/testsuite/
+	* gcc.dg/pr50193-1.c: New file.
+
+2011-09-12  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	PR target/49030
+	* config/arm/arm-protos.h (maybe_get_arm_condition_code): Declare.
+	* config/arm/arm.c (maybe_get_arm_condition_code): New function,
+	reusing the old code from get_arm_condition_code.  Return ARM_NV
+	for invalid comparison codes.
+	(get_arm_condition_code): Redefine in terms of
+	maybe_get_arm_condition_code.
+	* config/arm/predicates.md (arm_comparison_operator): Use
+	maybe_get_arm_condition_code.
+
+	gcc/testsuite/
+	PR target/49030
+	* gcc.dg/torture/pr49030.c: New test.
+
+2011-09-12  Andrew Stubbs  <ams@codesourcery.com>
+
+	Backport from FSF mainline:
+
+	2011-08-30  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* config/arm/arm.c (optimal_immediate_sequence_1): Make b1, b2,
+	b3 and b4 unsigned.
+
+	2011-08-30  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* config/arm/arm.c (arm_gen_constant): Set can_negate correctly
+	when code is SET.
+
+	2011-08-26  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* config/arm/arm.c (struct four_ints): New type.
+	(count_insns_for_constant): Delete function.
+	(find_best_start): Delete function.
+	(optimal_immediate_sequence): New function.
+	(optimal_immediate_sequence_1): New function.
+	(arm_gen_constant): Move constant splitting code to
+	optimal_immediate_sequence.
+	Rewrite constant negation/invertion code.
+
+	gcc/testsuite/
+	* gcc.target/arm/thumb2-replicated-constant1.c: New file.
+	* gcc.target/arm/thumb2-replicated-constant2.c: New file.
+	* gcc.target/arm/thumb2-replicated-constant3.c: New file.
+	* gcc.target/arm/thumb2-replicated-constant4.c: New file.
+
+	2011-08-26  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* config/arm/arm-protos.h (const_ok_for_op): Add prototype.
+	* config/arm/arm.c (const_ok_for_op): Add support for addw/subw.
+	Remove prototype. Remove static function type.
+	* config/arm/arm.md (*arm_addsi3): Add addw/subw support.
+	Add arch attribute.
+	* config/arm/constraints.md (Pj, PJ): New constraints.
+
+	2011-04-20  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* config/arm/arm.c (arm_gen_constant): Move mowv support ....
+	(const_ok_for_op): ... to here.
+
+	2011-04-20  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* config/arm/arm.c (arm_gen_constant): Remove redundant can_invert.
+
+
+
+2011-09-08  Andrew Stubbs  <ams@codesourcery.com>
+
+	Merge from FSF GCC 4.6.1 (svn branches/gcc-4_6-branch 178681).
+
+2011-09-07  Ira Rosen  <ira.rosen@linaro.org>
+
+	Backport from mainline:
+
+	2011-08-04  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/
+	* tree-vectorizer.h (struct _stmt_vec_info): Add new field for
+	pattern def statement, and its access macro.
+	(NUM_PATTERNS): Set to 5.
+	* tree-vect-loop.c (vect_determine_vectorization_factor): Handle
+	pattern def statement.
+	(vect_transform_loop): Likewise.
+	* tree-vect-patterns.c (vect_vect_recog_func_ptrs): Add new
+	function vect_recog_over_widening_pattern ().
+	(vect_operation_fits_smaller_type): New function.
+	(vect_recog_over_widening_pattern, vect_mark_pattern_stmts):
+	Likewise.
+	(vect_pattern_recog_1): Move the code that marks pattern
+	statements to vect_mark_pattern_stmts (), and call it.  Update
+	documentation.
+	* tree-vect-stmts.c (vect_supportable_shift): New function.
+	(vect_analyze_stmt): Handle pattern def statement.
+	(new_stmt_vec_info): Initialize pattern def statement.
+
+	gcc/testsuite/
+	* gcc.dg/vect/vect-over-widen-1.c: New test.
+	* gcc.dg/vect/vect-over-widen-2.c: New test.
+	* gcc.dg/vect/vect-over-widen-3.c: New test.
+	* gcc.dg/vect/vect-over-widen-4.c: New test.
+
+
+	2011-08-09  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/
+	PR tree-optimization/50014
+	* tree-vect-loop.c (vectorizable_reduction): Get def type before
+	calling vect_get_vec_def_for_stmt_copy ().
+
+	gcc/testsuite/
+	PR tree-optimization/50014
+	* gcc.dg/vect/pr50014.c: New test.
+
+
+	2011-08-11  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/
+	PR tree-optimization/50039
+	* tree-vect-patterns.c (vect_operation_fits_smaller_type): Check
+	that DEF_STMT has a stmt_vec_info.
+
+	gcc/testsuite/
+	PR tree-optimization/50039
+	* gcc.dg/vect/vect.exp: Run no-tree-fre-* tests with -fno-tree-fre.
+	* gcc.dg/vect/no-tree-fre-pr50039.c: New test.
+
+
+	2011-09-04  Jakub Jelinek  <jakub@redhat.com>
+		    Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/
+	PR tree-optimization/50208
+	* tree-vect-patterns.c (vect_handle_widen_mult_by_const): Add an
+	argument.  Check that def_stmt is inside the loop.
+	(vect_recog_widen_mult_pattern): Update calls to
+	vect_handle_widen_mult_by_cons.
+	(vect_operation_fits_smaller_type): Check that def_stmt is
+	inside the loop.
+
+	gcc/testsuite/
+	PR tree-optimization/50208
+	* gcc.dg/vect/no-fre-pre-pr50208.c: New test.
+	* gcc.dg/vect/vect.exp: Run no-fre-pre-*.c tests with
+	-fno-tree-fre -fno-tree-pre.
+
+2011-09-05  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	Backport from mainline.
+	2011-08-26  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
+
+	* config/arm/cortex-a9.md ("cortex_a9_mult_long"): New.
+	("cortex_a9_multiply_long"): New and use above.  Handle all
+	long multiply cases.
+	("cortex_a9_multiply"): Handle smmul and smmulr.
+	("cortex_a9_mac"): Handle smmla.
+
+2011-09-05  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	gcc/
+	2011-08-12  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	PR target/48328
+	* config/arm/arm.h (CASE_VECTOR_SHORTEN_MODE): Fix distance
+	for tbh instructions.
+
+2011-08-26  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-08-26  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* df-problems.c (df_note_bb_compute): Pass uses rather than defs
+	to df_set_dead_notes_for_mw.
+
+2011-08-25  Andrew Stubbs  <ams@codesourcery.com>
+
+	Backport from FSF mainline:
+
+	2011-08-19  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* tree-ssa-math-opts.c (is_widening_mult_rhs_p): Handle constants
+	beyond conversions.
+	(convert_mult_to_widen): Convert constant inputs to the right type.
+	(convert_plusminus_to_widen): Don't automatically reject inputs that
+	are not an SSA_NAME.
+	Convert constant inputs to the right type.
+
+	gcc/testsuite/
+	* gcc.target/arm/wmul-11.c: New file.
+	* gcc.target/arm/wmul-12.c: New file.
+	* gcc.target/arm/wmul-13.c: New file.
+
+	2011-08-19  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* tree-ssa-math-opts.c (convert_plusminus_to_widen): Convert add_rhs
+	to the correct type.
+
+	gcc/testsuite/
+	* gcc.target/arm/wmul-10.c: New file.
+
+	2011-08-19  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* tree-ssa-math-opts.c (convert_mult_to_widen): Better handle
+	unsigned inputs of different modes.
+	(convert_plusminus_to_widen): Likewise.
+
+	gcc/testsuite/
+	* gcc.target/arm/wmul-9.c: New file.
+	* gcc.target/arm/wmul-bitfield-2.c: New file.
+
+	2011-08-19  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* tree-ssa-math-opts.c (is_widening_mult_rhs_p): Add new argument
+	'type'.
+	Use 'type' from caller, not inferred from 'rhs'.
+	Don't reject non-conversion statements. Do return lhs in this case.
+	(is_widening_mult_p): Add new argument 'type'.
+	Use 'type' from caller, not inferred from 'stmt'.
+	Pass type to is_widening_mult_rhs_p.
+	(convert_mult_to_widen): Pass type to is_widening_mult_p.
+	(convert_plusminus_to_widen): Likewise.
+
+	gcc/testsuite/
+	* gcc.target/arm/wmul-8.c: New file.
+
+	2011-08-19  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* tree-ssa-math-opts.c (is_widening_mult_p): Remove FIXME.
+	Ensure the the larger type is the first operand.
+
+	gcc/testsuite/
+	* gcc.target/arm/wmul-7.c: New file.
+
+	2011-08-19  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* tree-ssa-math-opts.c (convert_mult_to_widen): Convert
+	unsupported unsigned multiplies to signed.
+	(convert_plusminus_to_widen): Likewise.
+
+	gcc/testsuite/
+	* gcc.target/arm/wmul-6.c: New file.
+
+	2011-08-19  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* tree-ssa-math-opts.c (convert_plusminus_to_widen): Permit a single
+	conversion statement separating multiply-and-accumulate.
+
+	gcc/testsuite/
+	* gcc.target/arm/wmul-5.c: New file.
+	* gcc.target/arm/no-wmla-1.c: New file.
+
+	2011-08-19  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* config/arm/arm.md (maddhidi4): Remove '*' from name.
+	* expr.c (expand_expr_real_2): Use find_widening_optab_handler.
+	* optabs.c (find_widening_optab_handler_and_mode): New function.
+	(expand_widen_pattern_expr): Use find_widening_optab_handler.
+	(expand_binop_directly): Likewise.
+	(expand_binop): Likewise.
+	* optabs.h (find_widening_optab_handler): New macro define.
+	(find_widening_optab_handler_and_mode): New prototype.
+	* tree-cfg.c (verify_gimple_assign_binary): Adjust WIDEN_MULT_EXPR
+	type precision rules.
+	(verify_gimple_assign_ternary): Likewise for WIDEN_MULT_PLUS_EXPR.
+	* tree-ssa-math-opts.c (build_and_insert_cast): New function.
+	(is_widening_mult_rhs_p): Allow widening by more than one mode.
+	Explicitly disallow mis-matched input types.
+	(convert_mult_to_widen): Use find_widening_optab_handler, and cast
+	input types to fit the new handler.
+	(convert_plusminus_to_widen): Likewise.
+
+	gcc/testsuite/
+	* gcc.target/arm/wmul-bitfield-1.c: New file.
+
+	2011-08-19  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* expr.c (expand_expr_real_2): Use widening_optab_handler.
+	* genopinit.c (optabs): Use set_widening_optab_handler for $N.
+	(gen_insn): $N now means $a must be wider than $b, not consecutive.
+	* optabs.c (widened_mode): New function.
+	(expand_widen_pattern_expr): Use widening_optab_handler.
+	(expand_binop_directly): Likewise.
+	(expand_binop): Likewise.
+	* optabs.h (widening_optab_handlers): New struct.
+	(optab_d): New member, 'widening'.
+	(widening_optab_handler): New function.
+	(set_widening_optab_handler): New function.
+	* tree-ssa-math-opts.c (convert_mult_to_widen): Use
+	widening_optab_handler.
+	(convert_plusminus_to_widen): Likewise.
+
+2011-08-24  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	LP:823548
+	gcc/
+	* config/arm/arm.c (arm_init_neon_builtins): Use 
+	n_operands instead of n_generator_args.
+
+2011-08-24  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	LP:823548
+	Backport from mainline
+	2011-04-18  Jie Zhang  <jie@codesourcery.com>
+	Richard Earnshaw  <rearnsha@arm.com>
+
+	* arm.c (neon_builtin_type_bits): Remove.
+	(typedef enum neon_builtin_mode): New.
+	(T_MAX): Don't define.
+	(typedef enum neon_builtin_datum): Remove bits, codes[],
+	num_vars and base_fcode.  Add mode, code and fcode.
+	(VAR1, VAR2, VAR3, VAR4, VAR5, VAR6, VAR7, VAR8, VAR9
+	VAR10): Change accordingly.
+	(neon_builtin_data[]): Change accordingly
+	(arm_init_neon_builtins): Change accordingly.
+	(neon_builtin_compare): Remove.
+	(locate_neon_builtin_icode): Remove.
+	(arm_expand_neon_builtin): Change accordingly.
+
+	* arm.h (enum arm_builtins): Move to ...
+	* arm.c (enum arm_builtins): ... here; and rearrange builtin code.
+
+	* arm.c (arm_builtin_decl): Declare.
+	(TARGET_BUILTIN_DECL): Define.
+	(enum arm_builtins): Correct ARM_BUILTIN_MAX.
+	(arm_builtin_decls[]): New.
+	(arm_init_neon_builtins): Store builtin declarations in
+	arm_builtin_decls[].
+	(arm_init_tls_builtins): Likewise.
+	(arm_init_iwmmxt_builtins): Likewise.  Refactor initialization code.
+	(arm_builtin_decl): New.
+
+2011-08-18  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-08-18  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* config/arm/arm.c (arm_rtx_costs_1): Don't modify the costs of SET.
+	(arm_size_rtx_costs): Likewise.
+
+2011-08-18  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-08-12  Richard Sandiford  <rdsandiford@googlemail.com>
+
+	* config/arm/arm.c (get_label_padding): New function.
+	(create_fix_barrier, arm_reorg): Use it.
+
+2011-08-16  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* LINARO-VERSION: Bump version.
+
+2011-08-16  Andrew Stubbs  <ams@codesourcery.com>
+
+	GCC Linaro 4.6-2011.08 released.
+
+	gcc/
+	* LINARO-VERSION: Update.
+
+2011-08-15  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	* config/rs6000/rs6000.c (paired_expand_vector_init): Don't create
+	CONST_VECTORs with symbolic elements.
+	(rs6000_expand_vector_init): Likewise.
+
+2011-08-15  Michael Hope  <michael.hope@linaro.org>
+
+	Merge from FSF GCC 4.6.1 (svn branches/gcc-4_6-branch 177703).
+
+2011-08-15  Michael Hope  <michael.hope@linaro.org>
+
+	Backport from mainline r177357
+
+	gcc/testsuite/
+	2011-08-04  Ian Bolton  <ian.bolton@arm.com>
+
+	* gcc.target/arm/vfp-1.c: no large negative offsets on Thumb2.
+
+2011-08-10  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+        2011-07-28  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	* config/arm/vfp.md ("*movdf_vfp"): Handle the VFP constraints
+	before the core constraints. Adjust attributes.
+	(*thumb2_movdf_vfp"): Likewise.
+
+2011-08-09  Revital Eres  <revital.eres@linaro.org>
+
+	gcc/
+	Backport from trunk -r176972:
+
+	* ddg.c (create_ddg_dep_from_intra_loop_link): Remove
+	the creation of anti-dep edge from a branch.
+	(add_cross_iteration_register_deps):
+	Create anti-dep edge from a branch.
+
+2011-08-09  Revital Eres  <revital.eres@linaro.org>
+
+	gcc/
+	Backport from trunk -r177235.
+	* modulo-sched.c (calculate_stage_count,
+	calculate_must_precede_follow, get_sched_window,
+	try_scheduling_node_in_cycle, remove_node_from_ps):
+	Add declaration.
+	(update_node_sched_params, set_must_precede_follow, optimize_sc):
+	New functions.
+	(reset_sched_times): Call update_node_sched_params.
+	(sms_schedule):	Call optimize_sc.
+	(get_sched_window): Change function arguments.
+	(sms_schedule_by_order): Update call to	get_sched_window.
+	Call set_must_precede_follow.
+	(calculate_stage_count): Add function argument.
+
+2011-07-31  Revital Eres  <revital.eres@linaro.org>
+
+	gcc/
+	Backport from trunk -r176970:
+
+	* modulo-sched.c: Change comment.
+	(reset_sched_times): Fix print message.
+	(print_partial_schedule): Add print info.
+
+
+2011-07-21  Richard Sandiford  <rdsandiford@googlemail.com>
+
+	gcc/
+	Backport from mainline:
+
+	2011-07-21  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* regcprop.c (maybe_mode_change): Check HARD_REGNO_MODE_OK.
+
+2011-07-21  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	PR middle-end/49736
+	* expr.c (all_zeros_p): Undo bogus part of last change.
+
+2011-07-21  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	Backport from mainline:
+	gcc/cp/
+	2011-07-13  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* typeck2.c (split_nonconstant_init_1): Pass the initializer directly,
+	rather than a pointer to it.  Return true if the whole of the value
+	was initialized by the generated statements.  Use
+	complete_ctor_at_level_p instead of count_type_elements.
+
+	gcc/
+	2011-07-13  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* tree.h (categorize_ctor_elements): Remove comment.  Fix long line.
+	(count_type_elements): Delete.
+	(complete_ctor_at_level_p): Declare.
+	* expr.c (flexible_array_member_p): New function, split out from...
+	(count_type_elements): ...here.  Make static.  Replace allow_flexarr
+	parameter with for_ctor_p.  When for_ctor_p is true, return the
+	number of elements that should appear in the top-level constructor,
+	otherwise return an estimate of the number of scalars.
+	(categorize_ctor_elements): Replace p_must_clear with p_complete.
+	(categorize_ctor_elements_1): Likewise.  Use complete_ctor_at_level_p.
+	(complete_ctor_at_level_p): New function, borrowing union logic
+	from old categorize_ctor_elements_1.
+	(mostly_zeros_p): Return true if the constructor is not complete.
+	(all_zeros_p): Update call to categorize_ctor_elements.
+	* gimplify.c (gimplify_init_constructor): Update call to
+	categorize_ctor_elements.  Don't call count_type_elements.
+	Unconditionally prevent clearing for variable-sized types,
+	otherwise rely on categorize_ctor_elements to detect
+	incomplete initializers.
+
+	gcc/testsuite/
+	2011-07-13  Chung-Lin Tang  <cltang@codesourcery.com>
+
+	* gcc.target/arm/pr48183.c: New test.
+
+2011-07-18  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* LINARO-VERSION: Bump version.
+
+2011-07-18  Andrew Stubbs  <ams@codesourcery.com>
+
+	GCC Linaro 4.6-2011.07-0 released.
+
+	gcc/
+	* LINARO-VERSION: Update.
+
+2011-07-15  Michael Hope  <michael.hope@linaro.org>
+
+	Backport from mainline r174540
+	LP: #807573
+
+	gcc/
+	2011-06-01  Richard Sandiford  <rdsandiford@googlemail.com>
+
+	PR rtl-optimization/48830
+	PR rtl-optimization/48808
+	PR rtl-optimization/48792
+	* reload.c (push_reload): Check contains_reg_of_mode.
+	* reload1.c (strip_paradoxical_subreg): New function.
+	(gen_reload_chain_without_interm_reg_p): Use it to handle
+	paradoxical subregs.
+	(emit_output_reload_insns, gen_reload): Likewise.
+
+	gcc/testsuite/
+	2011-06-01  Eric Botcazou  <ebotcazou@adacore.com>
+		    Hans-Peter Nilsson  <hp@axis.com>
+
+	PR rtl-optimization/48830
+	* gcc.target/sparc/ultrasp12.c: New test.
+
+2011-07-15  Michael Hope  <michael.hope@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-06-29  Nathan Sidwell  <nathan@codesourcery.com>
+
+	* config/arm/unwind-arm.c (enum __cxa_type_match_result): New.
+	(cxa_type_match): Correct declaration.
+	(__gnu_unwind_pr_common): Reconstruct
+	additional indirection when __cxa_type_match returns
+	succeeded_with_ptr_to_base.
+
+	libstdc++-v3/
+	Backport from mainline:
+
+	2011-06-29  Nathan Sidwell  <nathan@codesourcery.com>
+
+	* libsupc++/eh_arm.c (__cxa_type_match): Construct address of
+	thrown object here.  Return succeded_with_ptr_to_base for all
+	pointer cases.
+
+2011-07-15  Michael Hope  <michael.hope@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-03-22  Eric Botcazou  <ebotcazou@adacore.com>
+
+	* combine.c (simplify_set): Try harder to find the best CC mode when
+	simplifying a nested COMPARE on the RHS.
+
+2011-07-15  Michael Hope  <michael.hope@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-04-05  Eric Botcazou  <ebotcazou@adacore.com>
+
+	* ifcvt.c (cond_exec_process_insns): Disallow converting a block
+	that contains the prologue.
+
+	gcc/testsuite/
+	Backport from mainline:
+
+	2011-04-01  Bernd Schmidt  <bernds@codesourcery.com>
+
+	* gcc.c-torture/compile/20110401-1.c: New test.
+
+2011-07-13  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	Backport from mainline:
+	gcc/
+	2011-07-07  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* reload1.c (choose_reload_regs): Use mode sizes to check whether
+	an old reload register completely defines the required value.
+
+	gcc/testsuite/
+	2011-07-07  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* gcc.target/arm/neon-modes-3.c: New test.
+
+2011-07-11  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	gcc/
+	2011-06-22  Dmitry Plotnikov  <dplotnikov@ispras.ru>
+	Dmitry Melnik  <dm@ispras.ru>
+
+	* config/arm/arm.c (neon_immediate_valid_for_shift): New function.
+	(neon_output_shift_immediate): Ditto.
+	* config/arm/arm-protos.h (neon_immediate_valid_for_shift): New
+	prototype.
+	(neon_output_shift_immediate): Ditto.
+	* config/arm/neon.md (vashl<mode>3): Modified constraint.
+	(vashr<mode>3_imm): New insn pattern.
+	(vlshr<mode>3_imm): Ditto.
+	(vashr<mode>3): Modified constraint.
+	(vlshr<mode>3): Ditto.
+	* config/arm/predicates.md (imm_for_neon_lshift_operand): New
+	predicate.
+	(imm_for_neon_rshift_operand): Ditto.
+	(imm_lshift_or_reg_neon): Ditto.
+	(imm_rshift_or_reg_neon): Ditto.
+
+	* optabs.c (init_optabs): Init optab codes for vashl, vashr, vlshr.
+
+	gcc/testsuite
+	2011-06-22  Dmitry Plotnikov  <dplotnikov@ispras.ru>
+	Dmitry Melnik  <dm@ispras.ru>
+
+	* gcc.target/arm/neon-vshr-imm-1.c: New testcase.
+	* gcc.target/arm/neon-vshl-imm-1.c: New testcase.
+	* gcc.target/arm/neon-vlshr-imm-1.c: New testcase.
+
+2011-07-11  Revital Eres  <revital.eres@linaro.org>
+
+	Backport from mainline -r175091
+	gcc/
+	* modulo-sched.c (struct ps_insn): Remove row_rest_count
+	field.
+	(struct partial_schedule): Add rows_length field.
+	(verify_partial_schedule): Check rows_length.
+	(ps_insert_empty_row): Handle rows_length.
+	(create_partial_schedule): Likewise.
+	(free_partial_schedule): Likewise.
+	(reset_partial_schedule): Likewise.
+	(create_ps_insn): Remove rest_count argument.
+	(remove_node_from_ps): Update rows_length.
+	(add_node_to_ps): Update rows_length and call create_ps_insn without
+	passing row_rest_count.
+	(rotate_partial_schedule): Update rows_length.
+
+2011-07-11  Revital Eres  <revital.eres@linaro.org>
+
+	Backport from mainline -r175090.
+	gcc/
+	* ddg.c (add_intra_loop_mem_dep): New function.
+	(build_intra_loop_deps): Call it.
+	gcc/testsuite
+	* gcc.dg/sms-9.c: New file.
+
+2011-07-11  Ira Rosen  <ira.rosen@linaro.org>
+
+	Backport from FSF:
+	2011-06-16  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/
+	* tree-vectorizer.h (vect_recog_func_ptr): Change the first
+	argument to be a VEC of statements.
+	* tree-vect-loop.c (vect_determine_vectorization_factor): Remove the
+	assert that pattern statements have to have their vector type set.
+	* tree-vect-patterns.c (vect_recog_widen_sum_pattern):
+	Change the first argument to be a VEC of statements.  Update
+	documentation.
+	(vect_recog_dot_prod_pattern, vect_recog_pow_pattern): Likewise.
+	(vect_handle_widen_mult_by_const): New function.
+	(vect_recog_widen_mult_pattern):  Change the first argument to be a
+	VEC of statements.  Update documentation.  Check that the constant is
+	INTEGER_CST.  Support multiplication by a constant that fits an
+	intermediate type - call vect_handle_widen_mult_by_const.
+	(vect_pattern_recog_1): Update vect_recog_func_ptr and its
+	call.  Handle additional pattern statements if necessary.
+
+	gcc/testsuite/
+	* gcc.dg/vect/vect-widen-mult-half-u8.c: New test.
+
+	and
+	2011-06-30  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/
+	* tree-vect-loop.c (vect_determine_vectorization_factor): Handle
+	both pattern and original statements if necessary.
+	(vect_transform_loop): Likewise.
+	* tree-vect-patterns.c (vect_pattern_recog): Update documentation.
+	* tree-vect-stmts.c (vect_mark_relevant): Add new argument.
+	Mark the pattern statement only if the original statement doesn't
+	have its own uses.
+	(process_use): Call vect_mark_relevant with additional parameter.
+	(vect_mark_stmts_to_be_vectorized): Likewise.
+	(vect_get_vec_def_for_operand): Use vectorized pattern statement.
+	(vect_analyze_stmt): Handle both pattern and original statements
+	if necessary.
+	(vect_transform_stmt): Don't store vectorized pattern statement
+	in the original statement.
+	(vect_is_simple_use_1): Use related pattern statement only if the
+	original statement is irrelevant.
+	* tree-vect-slp.c (vect_get_and_check_slp_defs): Likewise.
+
+	gcc/testsuite/
+	* gcc.dg/vect/slp-widen-mult-half.c: New test.
+	* gcc.dg/vect/vect-widen-mult-half.c: New test.
+
+2011-07-07  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	* builtins.c (get_object_alignment): Fix comment.
+	* fold-const.c (get_pointer_modulus_and_residue): Remove
+	allow_func_align.  Use get_object_alignment.
+	(fold_binary_loc): Update caller.
+
+2011-07-07  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-06-29  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	PR tree-optimization/49545
+	* builtins.c (get_object_alignment_1): Update function comment.
+	Do not use DECL_ALIGN for functions, but test
+	TARGET_PTRMEMFUNC_VBIT_LOCATION instead.
+	* fold-const.c (get_pointer_modulus_and_residue): Don't check
+	for functions here.
+	* tree-ssa-ccp.c (get_value_from_alignment): Likewise.
+
+	gcc/testsuite/
+	Backport from mainline:
+
+	2011-06-29  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* gcc.dg/torture/pr49169.c: Restrict to ARM and MIPS targets.
+
+2011-07-07  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-07-27  Richard Guenther  <rguenther@suse.de>
+
+	PR tree-optimization/49169
+	* fold-const.c (get_pointer_modulus_and_residue): Don't rely on
+	the alignment of function decls.
+
+	gcc/testsuite/
+	Backport from mainline:
+
+	2011-07-27  Michael Hope  <michael.hope@linaro.org>
+		    Richard Sandiford  <richard.sandiford@linaro.org>
+
+	PR tree-optimization/49169
+	* gcc.dg/torture/pr49169.c: New test.
+
+2011-07-01  Andrew Stubbs  <ams@codesourcery.com>
+
+	Merge from FSF GCC 4.6.1 (svn branches/gcc-4_6-branch 175677).
+
+2011-07-03  Ira Rosen  <ira.rosen@linaro.org>
+
+	Backport from FSF:
+	2011-06-12  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/
+	* tree-vect-data-refs.c (vect_peeling_hash_get_most_frequent):
+	Take number of iterations to peel into account for equally frequent
+	misalignment values.
+
+2011-07-01  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	Backport from mainline.
+	LP 744754
+        2011-04-17  Chung-Lin Tang  <cltang@codesourcery.com>
+
+	* config/arm/arm.c (neon_struct_mem_operand):
+	Support POST_INC/PRE_DEC memory operands.
+
+2011-06-28  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	Backport from mainline.
+	LP 791327
+	gcc/
+        2011-06-09  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	PR target/49335
+	* config/arm/predicates.md (add_operator): New.
+	* config/arm/arm.md ("*arith_shiftsi"): Fix for SP reg usage
+	in Thumb2.
+
+2011-06-28  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	Backport from mainline.
+	gcc/
+	2011-06-24  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	PR target/49385
+	* config/arm/thumb2.md (*thumb2_movhi_insn): Make sure atleast
+	one of the operands is a register.
+
+2011-06-28  Ira Rosen  <ira.rosen@linaro.org>
+
+	Backport from FSF:
+
+	2011-06-07  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/
+	* tree-vectorizer.h (vect_recog_func_ptr): Make last argument to be
+	a pointer.
+	* tree-vect-patterns.c (vect_recog_widen_sum_pattern,
+	vect_recog_widen_mult_pattern, vect_recog_dot_prod_pattern,
+	vect_recog_pow_pattern): Likewise.
+	(vect_pattern_recog_1): Remove declaration.
+	(widened_name_p): Remove declaration.  Add new argument to specify
+	whether to check that both types are either signed or unsigned.
+	(vect_recog_widen_mult_pattern): Update documentation.  Handle
+	unsigned patterns and multiplication by constants.
+	(vect_pattern_recog_1): Update vect_recog_func references.  Use
+	statement information from the statement returned from pattern
+	detection functions.
+	(vect_pattern_recog): Update vect_recog_func reference.
+	* tree-vect-stmts.c (vectorizable_type_promotion): For widening
+	multiplication by a constant use the type of the other operand.
+
+	gcc/testsuite
+	* lib/target-supports.exp
+	(check_effective_target_vect_widen_mult_qi_to_hi):
+	Add NEON as supporting target.
+	(check_effective_target_vect_widen_mult_hi_to_si): Likewise.
+	(check_effective_target_vect_widen_mult_qi_to_hi_pattern): New.
+	(check_effective_target_vect_widen_mult_hi_to_si_pattern): New.
+	* gcc.dg/vect/vect-widen-mult-u8.c: Expect to be vectorized
+	using widening multiplication on targets that support it.
+	* gcc.dg/vect/vect-widen-mult-u16.c: Likewise.
+	* gcc.dg/vect/vect-widen-mult-const-s16.c: New test.
+	* gcc.dg/vect/vect-widen-mult-const-u16.c: New test.
+
+	and
+
+	2011-06-15  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/
+	* tree-vect-loop-manip.c (remove_dead_stmts_from_loop): Remove.
+	(slpeel_tree_peel_loop_to_edge): Don't call
+	remove_dead_stmts_from_loop.
+	* tree-vect-loop.c (vect_determine_vectorization_factor): Don't
+	remove irrelevant pattern statements.  For irrelevant statements
+	check if it is the last statement of a detected pattern, use
+	corresponding pattern statement instead.
+	(destroy_loop_vec_info): No need to remove pattern statements,
+	only free stmt_vec_info.
+	(vect_transform_loop): For irrelevant statements check if it is
+	the last statement of a detected pattern, use corresponding
+	pattern statement instead.
+	* tree-vect-patterns.c (vect_pattern_recog_1): Don't insert
+	pattern statements.  Set basic block for the new statement.
+	(vect_pattern_recog): Update documentation.
+	* tree-vect-stmts.c (vect_mark_stmts_to_be_vectorized): Scan
+	operands of pattern statements.
+	(vectorizable_call): Fix printing.  In case of a pattern statement
+	use the lhs of the original statement when creating a dummy
+	statement to replace the original call.
+	(vect_analyze_stmt): For irrelevant statements check if it is
+	the last statement of a detected pattern, use corresponding
+	pattern statement instead.
+	* tree-vect-slp.c (vect_schedule_slp_instance): For pattern
+	statements use gsi of the original statement.
+
+	and
+	2011-06-21  Ira Rosen  <ira.rosen@linaro.org>
+
+	PR tree-optimization/49478
+	gcc/
+
+	* tree-vect-loop.c (vectorizable_reduction): Handle DOT_PROD_EXPR
+	with constant operand.
+
+2011-06-28  Michael Hope  <michael.hope@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	Chung-Lin Tang  <cltang@codesourcery.com>
+	Richard Earnshaw  <rearnsha@arm.com>
+
+	PR target/48250
+	* config/arm/arm.c (arm_legitimize_reload_address): Update cases
+	to use sign-magnitude offsets. Reject unsupported unaligned
+	cases. Add detailed description in comments.
+	* config/arm/arm.md (reload_outdf): Disable for ARM mode; change
+	condition from TARGET_32BIT to TARGET_ARM.
+
+	Chung-Lin Tang  <cltang@codesourcery.com>
+
+	* config/arm/arm.c (arm_legitimize_reload_address): For NEON
+	quad-word modes, reduce to 9-bit index range when above 1016
+	limit.
+
+2011-06-20  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	gcc/
+	Backport from mainline.
+	2011-06-03  Julian Brown  <julian@codesourcery.com>
+
+	* config/arm/arm-cores.def (strongarm, strongarm110, strongarm1100)
+	(strongarm1110): Use strongarm tuning.
+	* config/arm/arm-protos.h (tune_params): Add max_insns_skipped
+	field.
+	* config/arm/arm.c (arm_strongarm_tune): New.
+	(arm_slowmul_tune, arm_fastmul_tune, arm_xscale_tune, arm_9e_tune)
+	(arm_v6t2_tune, arm_cortex_tune, arm_cortex_a5_tune)
+	(arm_cortex_a9_tune, arm_fa726te_tune): Add max_insns_skipped field
+	setting, using previous defaults or 1 for Cortex-A5.
+	(arm_option_override): Set max_insns_skipped from current tuning.
+
+2011-06-20  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	gcc/
+	Backport from mainline.
+	2011-06-02  Julian Brown  <julian@codesourcery.com>
+
+	* config/arm/arm-cores.def (cortex-a5): Use cortex_a5 tuning.
+	* config/arm/arm.c (arm_cortex_a5_branch_cost): New.
+	(arm_cortex_a5_tune): New.
+
+	2011-06-02  Julian Brown  <julian@codesourcery.com>
+
+	* config/arm/arm-protos.h (tune_params): Add branch_cost hook.
+	* config/arm/arm.c (arm_default_branch_cost): New.
+	(arm_slowmul_tune, arm_fastmul_tune, arm_xscale_tune, arm_9e_tune)
+	(arm_v6t2_tune, arm_cortex_tune, arm_cortex_a9_tune)
+	(arm_fa726_tune): Set branch_cost field using
+	arm_default_branch_cost.
+	* config/arm/arm.h (BRANCH_COST): Use branch_cost hook from
+	current_tune structure.
+	* dojump.c (tm_p.h): Include file.
+
+        2011-06-02  Julian Brown  <julian@codesourcery.com>
+
+	* config/arm/arm-cores.def (arm1156t2-s, arm1156t2f-s): Use v6t2
+	tuning.
+	(cortex-a5, cortex-a8, cortex-a15, cortex-r4, cortex-r4f, cortex-m4)
+	(cortex-m3, cortex-m1, cortex-m0): Use cortex tuning.
+	* config/arm/arm-protos.h (tune_params): Add prefer_constant_pool
+	field.
+	* config/arm/arm.c (arm_slowmul_tune, arm_fastmul_tune)
+	(arm_xscale_tune, arm_9e_tune, arm_cortex_a9_tune)
+	(arm_fa726te_tune): Add prefer_constant_pool setting.
+	(arm_v6t2_tune, arm_cortex_tune): New.
+	* config/arm/arm.h (TARGET_USE_MOVT): Make dependent on
+	prefer_constant_pool setting.
+
+2011-06-20  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	gcc/
+	Backport from mainline
+	2011-06-01  Paul Brook  <paul@cpodesourcery.com>
+	* config/arm/arm-cores.def: Add cortex-r5.  Add DIV flags to
+	Cortex-A15.
+	* config/arm/arm-tune.md: Regenerate.
+	* config/arm/arm.c (FL_DIV): Rename...
+	(FL_THUMB_DIV): ... to this.
+	(FL_ARM_DIV): Define.
+	(FL_FOR_ARCH7R, FL_FOR_ARCH7M): Use FL_THUMB_DIV.
+	(arm_arch_hwdiv): Remove.
+	(arm_arch_thumb_hwdiv, arm_arch_arm_hwdiv): New variables.
+	(arm_issue_rate): Add cortexr5.
+	* config/arm/arm.h (TARGET_CPU_CPP_BUILTINS): Set
+	__ARM_ARCH_EXT_IDIV__.
+	(TARGET_IDIV): Define.
+	(arm_arch_hwdiv): Remove.
+	(arm_arch_arm_hwdiv, arm_arch_thumb_hwdiv): New prototypes.
+	* config/arm/arm.md (tune_cortexr4): Add cortexr5.
+	(divsi3, udivsi3): New patterns.
+	* config/arm/thumb2.md (divsi3, udivsi3): Remove.
+	* doc/invoke.texi: Document ARM -mcpu=cortex-r5
+
+2011-06-14  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* LINARO-VERSION: Bump version.
+
+2011-06-14  Andrew Stubbs  <ams@codesourcery.com>
+
+	GCC Linaro 4.6-2011.06-0 released.
+
+	gcc/
+	* LINARO-VERSION: Update.
+
+2011-06-13  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	Backport from mainline:
+	gcc/
+	2011-06-13  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	PR target/48454
+	* config/arm/neon.md (vec_pack_trunc<mode>): Set the lengths
+	correctly for the case with Quad vectors.
+
+2011-06-10  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	Backport from mainline:
+	gcc/
+        2011-06-02  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+	* config/arm/neon.md (orndi3_neon): Actually split it.
+
+
+2011-06-10  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+        Backport from mainline.
+	gcc/
+        2011-05-26  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	* config/arm/neon.md ("orn<mode>3_neon"): Canonicalize not.
+	("orndi3_neon"): Likewise.
+	("bic<mode>3_neon"): Likewise.
+
+	gcc/testsuite
+	2011-05-26  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	* gcc.target/arm/neon-vorn-vbic.c: New test.
+
+2011-06-07  Andrew Stubbs  <ams@codesourcery.com>
+
+	Backport from FSF:
+
+	2011-06-07  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* config/arm/arm.md (*maddhidi4tb, *maddhidi4tt): New define_insns.
+	(*maddhisi4tb, *maddhisi4tt): New define_insns.
+
+	gcc/testsuite/
+	* gcc.target/arm/smlatb-1.c: New file.
+	* gcc.target/arm/smlatt-1.c: New file.
+	* gcc.target/arm/smlaltb-1.c: New file.
+	* gcc.target/arm/smlaltt-1.c: New file.
+
+2011-06-07  Andrew Stubbs  <ams@codesourcery.com>
+
+	Backport from FSF:
+
+	2011-06-07  Bernd Schmidt  <bernds@codesourcery.com>
+		    Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* simplify-rtx.c (simplify_unary_operation_1): Canonicalize widening
+	multiplies.
+	* doc/md.texi (Canonicalization of Instructions): Document widening
+	multiply canonicalization.
+
+	gcc/testsuite/
+	* gcc.target/arm/mla-2.c: New test.
+
+2001-06-02  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	* gimple.c (gimple_build_call_internal_1): Add missing call to
+	gimple_call_reset_alias_info.
+
+2001-06-02  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/testsuite/
+	Backport from mainline:
+
+	2011-05-03  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* gcc.dg/vect/vect-strided-u16-i3.c: New test.
+
+2001-06-02  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/testsuite/
+	Backport from mainline:
+
+	2011-05-03  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* lib/target-supports.exp (check_effective_target_vect_strided):
+	Replace with...
+	(check_effective_target_vect_strided2)
+	(check_effective_target_vect_strided3)
+	(check_effective_target_vect_strided4)
+	(check_effective_target_vect_strided8): ...these new functions.
+
+	* gcc.dg/vect/O3-pr39675-2.c: Update accordingly.
+	* gcc.dg/vect/costmodel/ppc/costmodel-slp-12.c: Likewise.
+	* gcc.dg/vect/fast-math-slp-27.c: Likewise.
+	* gcc.dg/vect/if-cvt-stores-vect-ifcvt-18.c: Likewise.
+	* gcc.dg/vect/pr37539.c: Likewise.
+	* gcc.dg/vect/slp-11a.c: Likewise.
+	* gcc.dg/vect/slp-11b.c: Likewise.
+	* gcc.dg/vect/slp-11c.c: Likewise.
+	* gcc.dg/vect/slp-12a.c: Likewise.
+	* gcc.dg/vect/slp-12b.c: Likewise.
+	* gcc.dg/vect/slp-18.c: Likewise.
+	* gcc.dg/vect/slp-19a.c: Likewise.
+	* gcc.dg/vect/slp-19b.c: Likewise.
+	* gcc.dg/vect/slp-21.c: Likewise.
+	* gcc.dg/vect/slp-23.c: Likewise.
+	* gcc.dg/vect/vect-cselim-1.c: Likewise.
+
+	* gcc.dg/vect/fast-math-vect-complex-3.c: Use vect_stridedN
+	instead of vect_interleave && vect_extract_even_odd.
+	* gcc.dg/vect/no-scevccp-outer-10a.c: Likewise.
+	* gcc.dg/vect/no-scevccp-outer-10b.c: Likewise.
+	* gcc.dg/vect/no-scevccp-outer-20.c: Likewise.
+	* gcc.dg/vect/vect-1.c: Likewise.
+	* gcc.dg/vect/vect-10.c: Likewise.
+	* gcc.dg/vect/vect-98.c: Likewise.
+	* gcc.dg/vect/vect-107.c: Likewise.
+	* gcc.dg/vect/vect-strided-a-mult.c: Likewise.
+	* gcc.dg/vect/vect-strided-a-u16-i2.c: Likewise.
+	* gcc.dg/vect/vect-strided-a-u16-i4.c: Likewise.
+	* gcc.dg/vect/vect-strided-a-u16-mult.c: Likewise.
+	* gcc.dg/vect/vect-strided-a-u32-mult.c: Likewise.
+	* gcc.dg/vect/vect-strided-a-u8-i2-gap.c: Likewise.
+	* gcc.dg/vect/vect-strided-a-u8-i8-gap2.c: Likewise.
+	* gcc.dg/vect/vect-strided-a-u8-i8-gap7.c: Likewise.
+	* gcc.dg/vect/vect-strided-float.c: Likewise.
+	* gcc.dg/vect/vect-strided-mult-char-ls.c: Likewise.
+	* gcc.dg/vect/vect-strided-mult.c: Likewise.
+	* gcc.dg/vect/vect-strided-same-dr.c: Likewise.
+	* gcc.dg/vect/vect-strided-u16-i2.c: Likewise.
+	* gcc.dg/vect/vect-strided-u16-i4.c: Likewise.
+	* gcc.dg/vect/vect-strided-u32-i4.c: Likewise.
+	* gcc.dg/vect/vect-strided-u32-i8.c: Likewise.
+	* gcc.dg/vect/vect-strided-u32-mult.c: Likewise.
+	* gcc.dg/vect/vect-strided-u8-i2-gap.c: Likewise.
+	* gcc.dg/vect/vect-strided-u8-i2.c: Likewise.
+	* gcc.dg/vect/vect-strided-u8-i8-gap2.c: Likewise.
+	* gcc.dg/vect/vect-strided-u8-i8-gap4.c: Likewise.
+	* gcc.dg/vect/vect-strided-u8-i8-gap7.c: Likewise.
+	* gcc.dg/vect/vect-strided-u8-i8.c: Likewise.
+	* gcc.dg/vect/vect-vfa-03.c: Likewise.
+
+	* gcc.dg/vect/no-scevccp-outer-18.c: Add vect_stridedN to the
+	target condition.
+	* gcc.dg/vect/pr30843.c: Likewise.
+	* gcc.dg/vect/pr33866.c: Likewise.
+	* gcc.dg/vect/slp-reduc-6.c: Likewise.
+	* gcc.dg/vect/vect-strided-store-a-u8-i2.c: Likewise.
+	* gcc.dg/vect/vect-strided-store-u16-i4.c: Likewise.
+	* gcc.dg/vect/vect-strided-store-u32-i2.c: Likewise.
+
+2001-06-02  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/testsuite/
+	Backport from mainline:
+
+	2011-05-03  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* gcc.dg/vect/slp-11.c: Split into...
+	* gcc.dg/vect/slp-11a.c, gcc.dg/vect/slp-11b.c,
+	gcc.dg/vect/slp-11c.c: ...these tests.
+	* gcc.dg/vect/slp-12a.c: Split 4-stride loop into...
+	* gcc.dg/vect/slp-12c.c: ...this new test.
+	* gcc.dg/vect/slp-19.c: Split into...
+	* gcc.dg/vect/slp-19a.c, gcc.dg/vect/slp-19b.c,
+	gcc.dg/vect/slp-19c.c: ...these new tests.
+
+2001-06-02  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/testsuite/
+	Backport from mainline:
+
+	2011-05-03  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* lib/target-supports.exp
+	(check_effective_target_vect_extract_even_odd_wide): Delete.
+	(check_effective_target_vect_strided_wide): Likewise.
+	* gcc.dg/vect/O3-pr39675-2.c: Use the non-wide versions instead.
+	* gcc.dg/vect/fast-math-pr35982.c: Likewise.
+	* gcc.dg/vect/fast-math-vect-complex-3.c: Likewise.
+	* gcc.dg/vect/pr37539.c: Likewise.
+	* gcc.dg/vect/slp-11.c: Likewise.
+	* gcc.dg/vect/slp-12a.c: Likewise.
+	* gcc.dg/vect/slp-12b.c: Likewise.
+	* gcc.dg/vect/slp-19.c: Likewise.
+	* gcc.dg/vect/slp-23.c: Likewise.
+	* gcc.dg/vect/vect-1.c: Likewise.
+	* gcc.dg/vect/vect-98.c: Likewise.
+	* gcc.dg/vect/vect-107.c: Likewise.
+	* gcc.dg/vect/vect-strided-float.c: Likewise.
+
+2001-06-02  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/testsuite/
+	Backport from mainline:
+
+	2011-04-21  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* gcc.dg/vect/vect.exp: Run the main tests twice, one with -flto
+	and once without.
+
+2001-06-02  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainlie:
+
+	2011-05-03  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* config/arm/neon.md (vec_load_lanes<mode><mode>): New expanders,
+	(vec_store_lanes<mode><mode>): Likewise.
+
+2001-06-02  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-05-03  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* doc/md.texi (vec_load_lanes, vec_store_lanes): Document.
+	* optabs.h (COI_vec_load_lanes, COI_vec_store_lanes): New
+	convert_optab_index values.
+	(vec_load_lanes_optab, vec_store_lanes_optab): New convert optabs.
+	* genopinit.c (optabs): Initialize the new optabs.
+	* internal-fn.def (LOAD_LANES, STORE_LANES): New internal functions.
+	* internal-fn.c (get_multi_vector_move, expand_LOAD_LANES)
+	(expand_STORE_LANES): New functions.
+	* tree.h (build_array_type_nelts): Declare.
+	* tree.c (build_array_type_nelts): New function.
+	* tree-vectorizer.h (vect_model_store_cost): Add a bool argument.
+	(vect_model_load_cost): Likewise.
+	(vect_store_lanes_supported, vect_load_lanes_supported)
+	(vect_record_strided_load_vectors): Declare.
+	* tree-vect-data-refs.c (vect_lanes_optab_supported_p)
+	(vect_store_lanes_supported, vect_load_lanes_supported): New functions.
+	(vect_transform_strided_load): Split out statement recording into...
+	(vect_record_strided_load_vectors): ...this new function.
+	* tree-vect-stmts.c (create_vector_array, read_vector_array)
+	(write_vector_array, create_array_ref): New functions.
+	(vect_model_store_cost): Add store_lanes_p argument.
+	(vect_model_load_cost): Add load_lanes_p argument.
+	(vectorizable_store): Try to use store-lanes functions for
+	interleaved stores.
+	(vectorizable_load): Likewise load-lanes and loads.
+	* tree-vect-slp.c (vect_get_and_check_slp_defs): Update call
+	to vect_model_store_cost.
+	(vect_build_slp_tree): Likewise vect_model_load_cost.
+
+2001-06-02  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-04-20  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* tree-vect-stmts.c (vectorizable_store): Only chain one related
+	statement per copy.
+
+2001-06-02  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	* tree-inline.c (estimate_num_insns): Likewise.
+
+	Backport from mainline:
+
+	2011-04-20  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* Makefile.in (INTERNAL_FN_DEF, INTERNAL_FN_H): Define.
+	(GIMPLE_H): Include $(INTERNAL_FN_H).
+	(OBJS-common): Add internal-fn.o.
+	(internal-fn.o): New rule.
+	* internal-fn.def: New file.
+	* internal-fn.h: Likewise.
+	* internal-fn.c: Likewise.
+	* gimple.h: Include internal-fn.h.
+	(GF_CALL_INTERNAL): New gf_mask.
+	(gimple_statement_call): Put fntype into a union with a new
+	internal_fn field.
+	(gimple_build_call_internal): Declare.
+	(gimple_build_call_internal_vec): Likewise.
+	(gimple_call_same_target_p): Likewise.
+	(gimple_call_internal_p): New function.
+	(gimple_call_internal_fn): Likewise.
+	(gimple_call_set_fn): Assert that the function is not internal.
+	(gimple_call_set_fndecl): Likewise.
+	(gimple_call_set_internal_fn): New function.
+	(gimple_call_addr_fndecl): Handle null functions.
+	(gimple_call_return_type): Likewise.
+	[---- Plus backport adjustments:
+	(GF_CALL_INTERNAL_FN_SHIFT): New macro.
+	(GF_CALL_INTERNAL_FN): New gf_mask.
+	----]
+	* gimple.c (gimple_build_call_internal_1): New function.
+	(gimple_build_call_internal): Likewise.
+	(gimple_build_call_internal_vec): Likewise.
+	(gimple_call_same_target_p): Likewise.
+	(gimple_call_flags): Handle calls to internal functions.
+	(gimple_call_fnspec): New function.
+	(gimple_call_arg_flags, gimple_call_return_flags): Use it.
+	(gimple_has_side_effects): Handle null functions.
+	(gimple_rhs_has_side_effects): Likewise.
+	(gimple_call_copy_skip_args): Handle calls to internal functions.
+	* cfgexpand.c (expand_call_stmt): Likewise.
+	* expr.c (expand_expr_real_1): Assert that the call isn't internal.
+	* gimple-low.c (gimple_check_call_args): Handle calls to internal
+	functions.
+	* gimple-pretty-print.c (dump_gimple_call): Likewise.
+	* ipa-prop.c (ipa_analyze_call_uses): Handle null functions.
+	* tree-cfg.c (verify_gimple_call): Handle calls to internal functions.
+	(do_warn_unused_result): Likewise.
+	[---- Plus backport adjustments:
+	(verify_stmt): Likewise.
+	----]
+	* tree-eh.c (same_handler_p): Use gimple_call_same_target_p.
+	* tree-ssa-ccp.c (ccp_fold_stmt): Handle calls to internal functions.
+	[---- Plus backport adjustments:
+	(fold_gimple_call): Likewise.
+	----]
+	* tree-ssa-dom.c (hashable_expr): Use the gimple statement to record
+	the target of a call.
+	(initialize_hash_element): Update accordingly.
+	(hashable_expr_equal_p): Use gimple_call_same_target_p.
+	(iterative_hash_hashable_expr): Handle calls to internal functions.
+	(print_expr_hash_elt): Likewise.
+	* tree-ssa-pre.c (can_value_number_call): Likewise.
+	(eliminate): Handle null functions.
+	* tree-ssa-sccvn.c (visit_use): Handle calls to internal functions.
+	* tree-ssa-structalias.c (find_func_aliases): Likewise.
+	* value-prof.c (gimple_ic_transform): Likewise.
+	(gimple_indirect_call_to_profile): Likewise.
+
+2001-06-02  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-04-14  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* tree-vectorizer.h (vect_strided_store_supported): Add a
+	HOST_WIDE_INT argument.
+	(vect_strided_load_supported): Likewise.
+	(vect_permute_store_chain): Return void.
+	(vect_transform_strided_load): Likewise.
+	(vect_permute_load_chain): Delete.
+	* tree-vect-data-refs.c (vect_strided_store_supported): Take a
+	count argument.  Check that the count is a power of two.
+	(vect_strided_load_supported): Likewise.
+	(vect_permute_store_chain): Return void.  Update after above changes.
+	Assert that the access is supported.
+	(vect_permute_load_chain): Likewise.
+	(vect_transform_strided_load): Return void.
+	* tree-vect-stmts.c (vectorizable_store): Update calls after
+	above interface changes.
+	(vectorizable_load): Likewise.
+	(vect_analyze_stmt): Don't check for strided powers of two here.
+
+2001-06-02  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-04-14  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* tree-vectorizer.h (vect_create_data_ref_ptr): Add an extra
+	type parameter.
+	* tree-vect-data-refs.c (vect_create_data_ref_ptr): Add an aggr_type
+	parameter.  Generalise code to handle arrays as well as vectors.
+	(vect_setup_realignment): Update accordingly.
+	* tree-vect-stmts.c (vectorizable_store): Likewise.
+	(vectorizable_load): Likewise.
+
+2001-06-02  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-04-14  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* tree-vect-stmts.c (vectorizable_load): Allocate and free dr_chain
+	within the per-copy loop.
+
+2001-06-02  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-04-14  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* tree-vect-stmts.c (vectorizable_load): Print the number of copies
+	in the dump file.
+
+2001-06-02  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-03-25  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* config/arm/arm.h (CANNOT_CHANGE_MODE_CLASS): Restrict FPA_REGS
+	case to VFPv1.
+
+2011-05-26  Andrew Stubbs  <ams@codesourcery.com>
+
+	Merge from FSF GCC 4.6 (svn branches/gcc-4_6-branch 174261).
+
+2011-06-02  Chung-Lin Tang  <cltang@codesourcery.com>
+
+	Backport from mainline:
+
+	2011-03-21  Chung-Lin Tang  <cltang@codesourcery.com>
+
+	gcc/
+	* simplify-rtx.c (simplify_binary_operation_1): Handle
+	(xor (and A B) C) case when B and C are both constants.
+
+	gcc/testsuite/
+	* gcc.target/arm/xor-and.c: New.
+
+	2011-03-18  Chung-Lin Tang  <cltang@codesourcery.com>
+
+	gcc/
+	* combine.c (try_combine): Do simplification only call of
+	subst() on i2 even when i1 is present. Update comments.
+
+	gcc/testsuite/
+	* gcc.target/arm/unsigned-extend-1.c: New.
+
+2011-05-16  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* LINARO-VERSION: Bump version.
+
+2011-05-16  Andrew Stubbs  <ams@codesourcery.com>
+
+	GCC Linaro 4.6-2011.05-0 released.
+
+	gcc/
+	* LINARO-VERSION: Update.
+
+2011-05-13  Revital Eres  <revital.eres@linaro.org>
+
+	gcc/
+	* ddg.c (free_ddg_all_sccs): Free sccs field in struct ddg_all_sccs.
+	* modulo-sched.c (sms_schedule): Avoid unfreed memory when SMS fails.
+
+2011-05-13  Revital Eres  <revital.eres@linaro.org>
+
+	gcc/
+	* loop-doloop.c (doloop_condition_get): Support new form of
+	doloop pattern and use prev_nondebug_insn instead of PREV_INSN.
+	* config/arm/thumb2.md (*thumb2_addsi3_compare0): Remove "*".
+	(doloop_end): New.
+	* config/arm/arm.md (*addsi3_compare0): Remove "*".
+	* params.def (sms-min-sc): New param flag.
+	* doc/invoke.texi (sms-min-sc): Document it.
+	* ddg.c (create_ddg_dep_from_intra_loop_link): If a true dep edge
+	enters the branch create an anti edge in the opposite direction
+	to prevent the creation of reg-moves.
+	* modulo-sched.c: Adjust comment to reflect the fact we are
+	scheduling closing branch.
+	(PS_STAGE_COUNT): Rename to CALC_STAGE_COUNT and redefine.
+	(stage_count): New field in struct partial_schedule.
+	(calculate_stage_count): New function.
+	(normalize_sched_times): Rename to reset_sched_times and handle
+	incrementing the sched time of the nodes by a constant value
+	passed as parameter.
+	(duplicate_insns_of_cycles): Skip closing branch.
+	(sms_schedule_by_order): Schedule closing branch.
+	(ps_insn_find_column): Handle closing branch.
+	(sms_schedule): Call reset_sched_times and adjust the code to
+	support scheduling of the closing branch. Use sms-min-sc.
+	Support new form of doloop pattern.
+	(ps_insert_empty_row): Update calls to normalize_sched_times
+	and rotate_partial_schedule functions.
+
+2011-05-12  Michael Hope  <michael.hope@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-05-05  Michael Hope  <michael.hope@linaro.org>
+
+	PR pch/45979
+	* config/host-linux.c (TRY_EMPTY_VM_SPACE): Define for
+	__ARM_EABI__ hosts.
+
+2011-05-06  Andrew Stubbs  <ams@codesourcery.com>
+
+	Merge from FSF GCC 4.6 (svn branches/gcc-4_6-branch 173480).
+
+2011-05-06  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	From Sergey Grechanik  <mouseentity@ispras.ru>, approved for mainline
+
+	* config/arm/arm.c (coproc_secondary_reload_class): Return NO_REGS
+	for constant vectors.
+
+2011-04-26  Andrew Stubbs  <ams@codesourcery.com>
+
+	Backport from FSF:
+
+	2011-04-05  Tom de Vries  <tom@codesourcery.com>
+
+	PR target/43920
+	gcc/
+	* config/arm/arm.h (BRANCH_COST): Set to 1 for Thumb-2 when optimizing
+	for size.
+
+2011-05-03  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/testsuite/
+	From  Richard Earnshaw  <rearnsha@arm.com>
+
+	PR target/46329
+	* gcc.target/arm/pr46329.c: New test.
+
+	gcc/
+	PR target/46329
+	* config/arm/arm.c (arm_legitimate_constant_p_1): Return false
+	for all Neon struct constants.
+
+2011-05-03  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	* targhooks.h (default_legitimate_constant_p); Declare.
+	* targhooks.c (default_legitimate_constant_p): New function.
+
+	Backport from mainline:
+	2011-04-21  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* target.def (legitimate_constant_p): New hook.
+	* doc/tm.texi.in (LEGITIMATE_CONSTANT_P): Replace with...
+	(TARGET_LEGITIMATE_CONSTANT_P): ...this.
+	* doc/tm.texi: Regenerate.
+	* calls.c (precompute_register_parameters): Replace uses of
+	LEGITIMATE_CONSTANT_P with targetm.legitimate_constant_p.
+	(emit_library_call_value_1): Likewise.
+	* expr.c (move_block_to_reg, can_store_by_pieces, emit_move_insn)
+	(compress_float_constant, emit_push_insn, expand_expr_real_1): Likewise.
+	* recog.c (general_operand, immediate_operand): Likewise.
+	* reload.c (find_reloads_toplev, find_reloads_address_part): Likewise.
+	* reload1.c (init_eliminable_invariants): Likewise.
+
+	* config/arm/arm-protos.h (arm_cannot_force_const_mem): Delete.
+	* config/arm/arm.h (ARM_LEGITIMATE_CONSTANT_P): Likewise.
+	(THUMB_LEGITIMATE_CONSTANT_P, LEGITIMATE_CONSTANT_P): Likewise.
+	* config/arm/arm.c (TARGET_LEGITIMATE_CONSTANT_P): Define.
+	(arm_legitimate_constant_p_1, thumb_legitimate_constant_p)
+	(arm_legitimate_constant_p): New functions.
+	(arm_cannot_force_const_mem): Make static.
+
+2011-05-03  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-05-03  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* hooks.h (hook_bool_mode_uhwi_false): Declare.
+	* hooks.c (hook_bool_mode_uhwi_false): New function.
+	* target.def (array_mode_supported_p): New hook.
+	* doc/tm.texi.in (TARGET_ARRAY_MODE_SUPPORTED_P): Add @hook.
+	* doc/tm.texi: Regenerate.
+	* stor-layout.c (mode_for_array): New function.
+	(layout_type): Use it.
+	* config/arm/arm.c (arm_array_mode_supported_p): New function.
+	(TARGET_ARRAY_MODE_SUPPORTED_P): Define.
+
+2011-05-03  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-04-12  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* config/arm/arm.c (arm_print_operand): Use MEM_SIZE to get the
+	size of a '%A' memory reference.
+	(T_DREG, T_QREG): New neon_builtin_type_bits.
+	(arm_init_neon_builtins): Assert that the load and store operands
+	are neon_struct_operands.
+	(locate_neon_builtin_icode): Provide the neon_builtin_type_bits.
+	(NEON_ARG_MEMORY): New builtin_arg.
+	(neon_dereference_pointer): New function.
+	(arm_expand_neon_args): Add a neon_builtin_type_bits argument.
+	Handle NEON_ARG_MEMORY.
+	(arm_expand_neon_builtin): Update after above interface changes.
+	Use NEON_ARG_MEMORY for loads and stores.
+	* config/arm/predicates.md (neon_struct_operand): New predicate.
+	* config/arm/iterators.md (V_two_elem): Tweak formatting.
+	(V_three_elem): Use BLKmode for accesses that have no associated mode.
+	(V_four_elem): Tweak formatting.
+	* config/arm/neon.md (neon_vld1<mode>, neon_vld1_dup<mode>)
+	(neon_vst1_lane<mode>, neon_vst1<mode>, neon_vld2<mode>)
+	(neon_vld2_lane<mode>, neon_vld2_dup<mode>, neon_vst2<mode>)
+	(neon_vst2_lane<mode>, neon_vld3<mode>, neon_vld3_lane<mode>)
+	(neon_vld3_dup<mode>, neon_vst3<mode>, neon_vst3_lane<mode>)
+	(neon_vld4<mode>, neon_vld4_lane<mode>, neon_vld4_dup<mode>)
+	(neon_vst4<mode>): Replace pointer operand with a memory operand.
+	Use %A in the output template.
+	(neon_vld3qa<mode>, neon_vld3qb<mode>, neon_vst3qa<mode>)
+	(neon_vst3qb<mode>, neon_vld4qa<mode>, neon_vld4qb<mode>)
+	(neon_vst4qa<mode>, neon_vst4qb<mode>): Likewise, but halve
+	the width of the memory access.  Remove post-increment.
+	* config/arm/neon-testgen.ml: Allow addresses to have an alignment.
+
+	gcc/testsuite/
+	Backport from mainline:
+
+	2011-04-12  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	* gcc.target/arm/neon-vld3-1.c: New test.
+	* gcc.target/arm/neon-vst3-1.c: New test.
+	* gcc.target/arm/neon/v*.c: Regenerate.
+
+2011-05-03  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	Backport from mainline:
+
+	2011-03-30  Richard Sandiford  <richard.sandiford@linaro.org>
+		    Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	PR target/43590
+	* config/arm/neon.md (neon_vld3qa<mode>, neon_vld4qa<mode>): Remove
+	operand 1 and reshuffle the operands to match.
+	(neon_vld3<mode>, neon_vld4<mode>): Update accordingly.
+
+2011-05-04  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	Backport from mainline:
+
+	2011-03-29  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	PR debug/48190
+	* dwarf2out.c (dw_loc_list_node): Add resolved_addr and replaced.
+	(cached_dw_loc_list_def): New structure.
+	(cached_dw_loc_list): New typedef.
+	(cached_dw_loc_list_table): New variable.
+	(cached_dw_loc_list_table_hash): New function.
+	(cached_dw_loc_list_table_eq): Likewise.
+	(add_location_or_const_value_attribute): Take a bool cache_p.
+	Cache the list when the parameter is true.
+	(gen_formal_parameter_die): Update caller.
+	(gen_variable_die): Likewise.
+	(dwarf2out_finish): Likewise.
+	(dwarf2out_abstract_function): Nullify cached_dw_loc_list_table
+	while generating debug info for the decl.
+	(dwarf2out_function_decl): Clear cached_dw_loc_list_table.
+	(dwarf2out_init): Initialize cached_dw_loc_list_table.
+	(resolve_addr): Cache the result of resolving a chain of
+	location lists.
+
+2011-04-26  Andrew Stubbs  <ams@codesourcery.com>
+
+	Backport from FSF:
+
+	2011-04-15  Maxim Kuvyrkov  <maxim@codesourcery.com>
+
+	gcc/
+	* combine.c (subst, combine_simlify_rtx): Add new argument, use it
+	to track processing of conditionals.  Update all callers.
+	(try_combine, simplify_if_then_else): Update.
+
+	2011-04-25  Maxim Kuvyrkov  <maxim@codesourcery.com>
+		    Eric Botcazou <ebotcazou@adacore.com>
+
+	gcc/
+	* combine.c (combine_simplify_rtx): Avoid mis-simplifying conditionals
+	for STORE_FLAG_VALUE==-1 case.
+
+2011-05-02  Ira Rosen  <ira.rosen@linaro.org>
+
+	Backport from FSF:
+
+	2011-03-27  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/
+	* config/arm/arm.c (arm_autovectorize_vector_sizes): New function.
+	(TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES): Define.
+
+	gcc/testsuite/
+	* gcc.dg/vect/vect-outer-5.c: Reduce the distance between data
+	accesses to preserve the meaning of the test for doubleword vectors.
+	* gcc.dg/vect/no-vfa-pr29145.c: Likewise.
+	* gcc.dg/vect/slp-3.c: Reduce the loop bound for the same reason.
+
+2011-04-27  Ira Rosen  <ira.rosen@linaro.org>
+
+	Backport from FSF:
+
+	2011-04-03  Richard Guenther  <rguenther@suse.de>
+		    Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/
+	* tree-if-conv.c (memrefs_read_or_written_unconditionally): Strip all
+	non-variable offsets and compare the remaining bases of the two
+	accesses instead of looking for exact same data-ref.
+
+	gcc/testsuite/
+	* gcc.dg/vect/if-cvt-stores-vect-ifcvt-18.c: New test.
+	* gcc.dg/vect/vect.exp: Run if-cvt-stores-vect* tests with
+	-ftree-loop-if-convert-stores.
+
+2011-04-21  Andrew Stubbs  <ams@codesourcery.com>
+
+	Backport from FSF:
+
+	2008-12-03  Daniel Jacobowitz  <dan@codesourcery.com>
+
+	gcc/testsuite/
+	* gcc.dg/vect/vect-shift-3.c, gcc.dg/vect/vect-shift-4.c: New.
+	* lib/target-supports.exp (check_effective_target_vect_shift_char): New
+	function.
+
+2011-04-19  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* LINARO-VERSION: Bump version.
+
+2011-04-19  Andrew Stubbs  <ams@codesourcery.com>
+
+	GCC Linaro 4.6-2011.04-0 released.
+
+	gcc/
+	* LINARO-VERSION: Update.
+
+2011-04-13  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	Backport from FSF:
+
+	2011-04-12  Ramana Radhakrishnan  <ramana.radhakrishnan@linaro.org>
+
+	gcc/
+	* config/arm/arm.md (*arm_negdi2): Fix early clobber constraints.
+
+2011-03-27  Ira Rosen  <ira.rosen@linaro.org>
+
+	gcc/
+	* doc/invoke.texi (max-stores-to-sink): Document.
+	* params.h (MAX_STORES_TO_SINK): Define.
+	* opts.c (finish_options): Set MAX_STORES_TO_SINK to 0
+	if either vectorization or if-conversion is disabled.
+	* tree-data-ref.c (dr_equal_offsets_p1): Moved and renamed from
+	tree-vect-data-refs.c vect_equal_offsets.
+	(dr_equal_offsets_p): New function.
+	(find_data_references_in_bb): Remove static.
+	* tree-data-ref.h (find_data_references_in_bb): Declare.
+	(dr_equal_offsets_p): Likewise.
+	* tree-vect-data-refs.c (vect_equal_offsets): Move to tree-data-ref.c.
+	(vect_drs_dependent_in_basic_block): Update calls to
+	vect_equal_offsets.
+	(vect_check_interleaving): Likewise.
+	* tree-ssa-phiopt.c: Include cfgloop.h and tree-data-ref.h.
+	(cond_if_else_store_replacement): Rename to...
+	(cond_if_else_store_replacement_1): ... this.  Change arguments and
+	documentation.
+	(cond_if_else_store_replacement): New function.
+	* Makefile.in (tree-ssa-phiopt.o): Adjust dependencies.
+	* params.def (PARAM_MAX_STORES_TO_SINK): Define.
+
+	gcc/testsuite/
+	* gcc.dg/vect/vect-cselim-1.c: New test.
+	* gcc.dg/vect/vect-cselim-2.c: New test.
+
+2011-04-07  Andrew Stubbs  <ams@codesourcery.com>
+
+	Merge from FSF GCC 4.6 (svn branches/gcc-4_6-branch 171921).
+
+2011-03-23  Andrew Stubbs  <ams@codesourcery.com>
+
+	Backport from FSF:
+
+	2011-03-23  Julian Brown  <julian@codesourcery.com>
+
+	gcc/
+	* expr.c (expand_expr_real_1): Only use BLKmode for volatile
+	accesses which are not naturally aligned.
+
+2011-03-26  Andrew Stubbs  <ams@codesourcery.com>
+
+	Merge from FSF GCC 4.6 (svn branches/gcc-4_6-branch 171336).
+
+2011-03-22  Andrew Stubbs  <ams@codesourcery.com>
+
+	Backport from FSF:
+
+	2011-03-21  Daniel Jacobowitz  <dan@codesourcery.com>
+
+	gcc/
+	* config/arm/unwind-arm.c (__gnu_unwind_pr_common): Correct test
+	for barrier handlers.
+
+2011-03-13  Andrew Stubbs  <ams@codesourcery.com>
+
+	Merge from FSF GCC 4.6 (trunk svn 170846).
+
+2011-03-10  Andrew Stubbs  <ams@codesourcery.com>
+
+	Merge from FSF GCC 4.6 (trunk svn 170669).
+
+2011-03-03  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* LINARO-VERSION: Bump version.
+
+2011-03-03  Andrew Stubbs  <ams@codesourcery.com>
+
+	GCC Linaro 4.6-2011.03-0 released.
+
+	gcc/
+	* LINARO-VERSION: Update.
+
+2011-02-02  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	* config/arm/predicates.md (neon_lane_number): Accept 0..15.
+
+	gcc/testsuite/
+	* gcc.target/arm/neon-vld-1.c: New test.
+
+2011-02-02  Richard Sandiford  <richard.sandiford@linaro.org>
+
+	gcc/
+	PR target/47551
+	* config/arm/arm.c (coproc_secondary_reload_class): Handle
+	structure modes.  Don't check neon_vector_mem_operand for
+	vector or structure modes.
+
+	gcc/testsuite/
+	PR target/47551
+	* gcc.target/arm/neon-modes-2.c: New test.
+
+2011-02-28  Andrew Stubbs  <ams@codesourcery.com>
+
+	Merge from FSF GCC 4.6 (trunk svn 170492).
+
+2011-02-25  Andrew Stubbs  <ams@codesourcery.com>
+
+	Merge from FSF GCC 4.6 (trunk svn 170356).
+
+2011-02-21  Andrew Stubbs  <ams@codesourcery.com>
+	    Julian Brown  <julian@codesourcery.com>
+	    Mark Shinwell  <shinwell@codesourcery.com>
+
+	Forward-ported from Linaro GCC 4.5 (bzr99324).
+
+	gcc/
+	* config/arm/arm.h (arm_class_likely_spilled_p): Check against
+	LO_REGS only for Thumb-1.
+	(MODE_BASE_REG_CLASS): Restrict base registers to those which can
+	be used in short instructions when optimising for size on Thumb-2.
+
+2011-02-19  Andrew Stubbs  <ams@codesourcery.com>
+
+	Merged from Linaro GCC 4.5 (bzr99409).
+
+	2010-09-30  Jie Zhang  <jie@codesourcery.com>
+
+	gcc/testsuite/
+	* gcc.target/arm/neon-thumb2-move.c: Add
+	dg-require-effective-target arm_thumb2_ok.
+
+2011-02-19  Andrew Stubbs  <ams@codesourcery.com>
+
+	Merged from Linaro GCC 4.5 (bzr99325).
+
+	2006-04-21  Kazu Hirata  <kazu@codesourcery.com>
+
+	gcc/testsuite/
+	* gcc.target/arm/vfp-ldmdbd.c, gcc.target/arm/vfp-ldmdbs.c,
+	gcc.target/arm/vfp-ldmiad.c, gcc.target/arm/vfp-ldmias.c,
+	gcc.target/arm/vfp-stmdbd.c, gcc.target/arm/vfp-stmdbs.c,
+	gcc.target/arm/vfp-stmiad.c, gcc.target/arm/vfp-stmias.c: New.
+
+2011-02-19  Andrew Stubbs  <ams@codesourcery.com>
+
+	gcc/
+	* configure: Regenerate.
+	* configure.ac (PKGVERSION): Set default to a custom
+	Linaro string.
+	* LINARO-VERSION: New file.
+
+Imported GCC from FSF trunk SVN revision 170067.
--- a/src/gcc/builtins.c
+++ b/src/gcc/builtins.c
@@ -264,7 +264,14 @@
 }
 
 /* Return the alignment in bits of EXP, an object.
-   Don't return more than MAX_ALIGN no matter what.  */
+   Don't return more than MAX_ALIGN no matter what.
+
+   Note that the address (and thus the alignment) computed here is based
+   on the address to which a symbol resolves, whereas DECL_ALIGN is based
+   on the address at which an object is actually located.  These two
+   addresses are not always the same.  For example, on ARM targets,
+   the address &foo of a Thumb function foo() has the lowest bit set,
+   whereas foo() itself starts on an even address.  */
 
 unsigned int
 get_object_alignment (tree exp, unsigned int max_align)
@@ -286,7 +293,21 @@
     exp = DECL_INITIAL (exp);
   if (DECL_P (exp)
       && TREE_CODE (exp) != LABEL_DECL)
-    align = DECL_ALIGN (exp);
+    {
+      if (TREE_CODE (exp) == FUNCTION_DECL)
+	{
+	  /* Function addresses can encode extra information besides their
+	     alignment.  However, if TARGET_PTRMEMFUNC_VBIT_LOCATION
+	     allows the low bit to be used as a virtual bit, we know
+	     that the address itself must be 2-byte aligned.  */
+	  if (TARGET_PTRMEMFUNC_VBIT_LOCATION == ptrmemfunc_vbit_in_pfn)
+	    align = 2 * BITS_PER_UNIT;
+	  else
+	    align = BITS_PER_UNIT;
+	}
+      else
+	align = DECL_ALIGN (exp);
+    }
   else if (CONSTANT_CLASS_P (exp))
     {
       align = TYPE_ALIGN (TREE_TYPE (exp));
--- a/src/gcc/calls.c
+++ b/src/gcc/calls.c
@@ -686,7 +686,7 @@
 	/* If the value is a non-legitimate constant, force it into a
 	   pseudo now.  TLS symbols sometimes need a call to resolve.  */
 	if (CONSTANT_P (args[i].value)
-	    && !LEGITIMATE_CONSTANT_P (args[i].value))
+	    && !targetm.legitimate_constant_p (args[i].mode, args[i].value))
 	  args[i].value = force_reg (args[i].mode, args[i].value);
 
 	/* If we are to promote the function arg to a wider mode,
@@ -3578,7 +3578,8 @@
 
       /* Make sure it is a reasonable operand for a move or push insn.  */
       if (!REG_P (addr) && !MEM_P (addr)
-	  && ! (CONSTANT_P (addr) && LEGITIMATE_CONSTANT_P (addr)))
+	  && !(CONSTANT_P (addr)
+	       && targetm.legitimate_constant_p (Pmode, addr)))
 	addr = force_operand (addr, NULL_RTX);
 
       argvec[count].value = addr;
@@ -3619,7 +3620,7 @@
 
       /* Make sure it is a reasonable operand for a move or push insn.  */
       if (!REG_P (val) && !MEM_P (val)
-	  && ! (CONSTANT_P (val) && LEGITIMATE_CONSTANT_P (val)))
+	  && !(CONSTANT_P (val) && targetm.legitimate_constant_p (mode, val)))
 	val = force_operand (val, NULL_RTX);
 
       if (pass_by_reference (&args_so_far, mode, NULL_TREE, 1))
--- a/src/gcc/cfgexpand.c
+++ b/src/gcc/cfgexpand.c
@@ -1843,12 +1843,17 @@
 static void
 expand_call_stmt (gimple stmt)
 {
-  tree exp;
-  tree lhs = gimple_call_lhs (stmt);
+  tree exp, lhs;
   size_t i;
   bool builtin_p;
   tree decl;
 
+  if (gimple_call_internal_p (stmt))
+    {
+      expand_internal_call (stmt);
+      return;
+    }
+
   exp = build_vl_exp (CALL_EXPR, gimple_call_num_args (stmt) + 3);
 
   CALL_EXPR_FN (exp) = gimple_call_fn (stmt);
@@ -1886,6 +1891,7 @@
   SET_EXPR_LOCATION (exp, gimple_location (stmt));
   TREE_BLOCK (exp) = gimple_block (stmt);
 
+  lhs = gimple_call_lhs (stmt);
   if (lhs)
     expand_assignment (lhs, exp, false);
   else
@@ -3209,6 +3215,8 @@
     case VEC_UNPACK_LO_EXPR:
     case VEC_WIDEN_MULT_HI_EXPR:
     case VEC_WIDEN_MULT_LO_EXPR:
+    case VEC_WIDEN_LSHIFT_HI_EXPR:
+    case VEC_WIDEN_LSHIFT_LO_EXPR:
       return NULL;
 
    /* Misc codes.  */
--- a/src/gcc/combine.c
+++ b/src/gcc/combine.c
@@ -391,8 +391,8 @@
 static void undo_all (void);
 static void undo_commit (void);
 static rtx *find_split_point (rtx *, rtx, bool);
-static rtx subst (rtx, rtx, rtx, int, int);
-static rtx combine_simplify_rtx (rtx, enum machine_mode, int);
+static rtx subst (rtx, rtx, rtx, int, int, int);
+static rtx combine_simplify_rtx (rtx, enum machine_mode, int, int);
 static rtx simplify_if_then_else (rtx);
 static rtx simplify_set (rtx);
 static rtx simplify_logical (rtx);
@@ -3130,7 +3130,7 @@
       /* It is possible that the source of I2 or I1 may be performing
 	 an unneeded operation, such as a ZERO_EXTEND of something
 	 that is known to have the high part zero.  Handle that case
-	 by letting subst look at the innermost one of them.
+	 by letting subst look at the inner insns.
 
 	 Another way to do this would be to have a function that tries
 	 to simplify a single insn instead of merging two or more
@@ -3153,13 +3153,11 @@
 	  if (i1)
 	    {
 	      subst_low_luid = DF_INSN_LUID (i1);
-	      i1src = subst (i1src, pc_rtx, pc_rtx, 0, 0);
-	    }
-	  else
-	    {
-	      subst_low_luid = DF_INSN_LUID (i2);
-	      i2src = subst (i2src, pc_rtx, pc_rtx, 0, 0);
+	      i1src = subst (i1src, pc_rtx, pc_rtx, 0, 0, 0);
 	    }
+
+	  subst_low_luid = DF_INSN_LUID (i2);
+	  i2src = subst (i2src, pc_rtx, pc_rtx, 0, 0, 0);
 	}
 
       n_occurrences = 0;		/* `subst' counts here */
@@ -3170,7 +3168,7 @@
 	 self-referential RTL when we will be substituting I1SRC for I1DEST
 	 later.  Likewise if I0 feeds into I2, either directly or indirectly
 	 through I1, and I0DEST is in I0SRC.  */
-      newpat = subst (PATTERN (i3), i2dest, i2src, 0,
+      newpat = subst (PATTERN (i3), i2dest, i2src, 0, 0,
 		      (i1_feeds_i2_n && i1dest_in_i1src)
 		      || ((i0_feeds_i2_n || (i0_feeds_i1_n && i1_feeds_i2_n))
 			  && i0dest_in_i0src));
@@ -3214,7 +3212,7 @@
 	 copy of I1SRC each time we substitute it, in order to avoid creating
 	 self-referential RTL when we will be substituting I0SRC for I0DEST
 	 later.  */
-      newpat = subst (newpat, i1dest, i1src, 0,
+      newpat = subst (newpat, i1dest, i1src, 0, 0,
 		      i0_feeds_i1_n && i0dest_in_i0src);
       substed_i1 = 1;
 
@@ -3248,7 +3246,7 @@
 
       n_occurrences = 0;
       subst_low_luid = DF_INSN_LUID (i0);
-      newpat = subst (newpat, i0dest, i0src, 0, 0);
+      newpat = subst (newpat, i0dest, i0src, 0, 0, 0);
       substed_i0 = 1;
     }
 
@@ -3310,7 +3308,7 @@
 	{
 	  rtx t = i1pat;
 	  if (i0_feeds_i1_n)
-	    t = subst (t, i0dest, i0src_copy ? i0src_copy : i0src, 0, 0);
+	    t = subst (t, i0dest, i0src_copy ? i0src_copy : i0src, 0, 0, 0);
 
 	  XVECEXP (newpat, 0, --total_sets) = t;
 	}
@@ -3318,10 +3316,10 @@
 	{
 	  rtx t = i2pat;
 	  if (i1_feeds_i2_n)
-	    t = subst (t, i1dest, i1src_copy ? i1src_copy : i1src, 0,
+	    t = subst (t, i1dest, i1src_copy ? i1src_copy : i1src, 0, 0,
 		       i0_feeds_i1_n && i0dest_in_i0src);
 	  if ((i0_feeds_i1_n && i1_feeds_i2_n) || i0_feeds_i2_n)
-	    t = subst (t, i0dest, i0src_copy2 ? i0src_copy2 : i0src, 0, 0);
+	    t = subst (t, i0dest, i0src_copy2 ? i0src_copy2 : i0src, 0, 0, 0);
 
 	  XVECEXP (newpat, 0, --total_sets) = t;
 	}
@@ -4998,11 +4996,13 @@
 
    IN_DEST is nonzero if we are processing the SET_DEST of a SET.
 
+   IN_COND is nonzero if we are on top level of the condition.
+
    UNIQUE_COPY is nonzero if each substitution must be unique.  We do this
    by copying if `n_occurrences' is nonzero.  */
 
 static rtx
-subst (rtx x, rtx from, rtx to, int in_dest, int unique_copy)
+subst (rtx x, rtx from, rtx to, int in_dest, int in_cond, int unique_copy)
 {
   enum rtx_code code = GET_CODE (x);
   enum machine_mode op0_mode = VOIDmode;
@@ -5063,7 +5063,7 @@
       && GET_CODE (XVECEXP (x, 0, 0)) == SET
       && GET_CODE (SET_SRC (XVECEXP (x, 0, 0))) == ASM_OPERANDS)
     {
-      new_rtx = subst (XVECEXP (x, 0, 0), from, to, 0, unique_copy);
+      new_rtx = subst (XVECEXP (x, 0, 0), from, to, 0, 0, unique_copy);
 
       /* If this substitution failed, this whole thing fails.  */
       if (GET_CODE (new_rtx) == CLOBBER
@@ -5080,7 +5080,7 @@
 	      && GET_CODE (dest) != CC0
 	      && GET_CODE (dest) != PC)
 	    {
-	      new_rtx = subst (dest, from, to, 0, unique_copy);
+	      new_rtx = subst (dest, from, to, 0, 0, unique_copy);
 
 	      /* If this substitution failed, this whole thing fails.  */
 	      if (GET_CODE (new_rtx) == CLOBBER
@@ -5126,8 +5126,8 @@
 		    }
 		  else
 		    {
-		      new_rtx = subst (XVECEXP (x, i, j), from, to, 0,
-				   unique_copy);
+		      new_rtx = subst (XVECEXP (x, i, j), from, to, 0, 0,
+				       unique_copy);
 
 		      /* If this substitution failed, this whole thing
 			 fails.  */
@@ -5204,7 +5204,9 @@
 				&& (code == SUBREG || code == STRICT_LOW_PART
 				    || code == ZERO_EXTRACT))
 			       || code == SET)
-			      && i == 0), unique_copy);
+			      && i == 0),
+				 code == IF_THEN_ELSE && i == 0,
+				 unique_copy);
 
 	      /* If we found that we will have to reject this combination,
 		 indicate that by returning the CLOBBER ourselves, rather than
@@ -5261,7 +5263,7 @@
       /* If X is sufficiently simple, don't bother trying to do anything
 	 with it.  */
       if (code != CONST_INT && code != REG && code != CLOBBER)
-	x = combine_simplify_rtx (x, op0_mode, in_dest);
+	x = combine_simplify_rtx (x, op0_mode, in_dest, in_cond);
 
       if (GET_CODE (x) == code)
 	break;
@@ -5281,10 +5283,12 @@
    expression.
 
    OP0_MODE is the original mode of XEXP (x, 0).  IN_DEST is nonzero
-   if we are inside a SET_DEST.  */
+   if we are inside a SET_DEST.  IN_COND is nonzero if we are on the top level
+   of a condition.  */
 
 static rtx
-combine_simplify_rtx (rtx x, enum machine_mode op0_mode, int in_dest)
+combine_simplify_rtx (rtx x, enum machine_mode op0_mode, int in_dest,
+		      int in_cond)
 {
   enum rtx_code code = GET_CODE (x);
   enum machine_mode mode = GET_MODE (x);
@@ -5339,8 +5343,8 @@
 	     false arms to store-flag values.  Be careful to use copy_rtx
 	     here since true_rtx or false_rtx might share RTL with x as a
 	     result of the if_then_else_cond call above.  */
-	  true_rtx = subst (copy_rtx (true_rtx), pc_rtx, pc_rtx, 0, 0);
-	  false_rtx = subst (copy_rtx (false_rtx), pc_rtx, pc_rtx, 0, 0);
+	  true_rtx = subst (copy_rtx (true_rtx), pc_rtx, pc_rtx, 0, 0, 0);
+	  false_rtx = subst (copy_rtx (false_rtx), pc_rtx, pc_rtx, 0, 0, 0);
 
 	  /* If true_rtx and false_rtx are not general_operands, an if_then_else
 	     is unlikely to be simpler.  */
@@ -5684,7 +5688,7 @@
 	{
 	  /* Try to simplify the expression further.  */
 	  rtx tor = simplify_gen_binary (IOR, mode, XEXP (x, 0), XEXP (x, 1));
-	  temp = combine_simplify_rtx (tor, VOIDmode, in_dest);
+	  temp = combine_simplify_rtx (tor, VOIDmode, in_dest, 0);
 
 	  /* If we could, great.  If not, do not go ahead with the IOR
 	     replacement, since PLUS appears in many special purpose
@@ -5777,7 +5781,16 @@
 	     ZERO_EXTRACT is indeed appropriate, it will be placed back by
 	     the call to make_compound_operation in the SET case.  */
 
-	  if (STORE_FLAG_VALUE == 1
+	  if (in_cond)
+	    /* Don't apply below optimizations if the caller would
+	       prefer a comparison rather than a value.
+	       E.g., for the condition in an IF_THEN_ELSE most targets need
+	       an explicit comparison.  */
+	    {
+	      ;
+	    }
+
+	  else if (STORE_FLAG_VALUE == 1
 	      && new_code == NE && GET_MODE_CLASS (mode) == MODE_INT
 	      && op1 == const0_rtx
 	      && mode == GET_MODE (op0)
@@ -5823,7 +5836,10 @@
 
 	  /* If STORE_FLAG_VALUE is -1, we have cases similar to
 	     those above.  */
-	  if (STORE_FLAG_VALUE == -1
+	  if (in_cond)
+	    ;
+
+	  else if (STORE_FLAG_VALUE == -1
 	      && new_code == NE && GET_MODE_CLASS (mode) == MODE_INT
 	      && op1 == const0_rtx
 	      && (num_sign_bit_copies (op0, mode)
@@ -6021,11 +6037,11 @@
       if (reg_mentioned_p (from, true_rtx))
 	true_rtx = subst (known_cond (copy_rtx (true_rtx), true_code,
 				      from, true_val),
-		      pc_rtx, pc_rtx, 0, 0);
+			  pc_rtx, pc_rtx, 0, 0, 0);
       if (reg_mentioned_p (from, false_rtx))
 	false_rtx = subst (known_cond (copy_rtx (false_rtx), false_code,
 				   from, false_val),
-		       pc_rtx, pc_rtx, 0, 0);
+			   pc_rtx, pc_rtx, 0, 0, 0);
 
       SUBST (XEXP (x, 1), swapped ? false_rtx : true_rtx);
       SUBST (XEXP (x, 2), swapped ? true_rtx : false_rtx);
@@ -6242,11 +6258,11 @@
 	{
 	  temp = subst (simplify_gen_relational (true_code, m, VOIDmode,
 						 cond_op0, cond_op1),
-			pc_rtx, pc_rtx, 0, 0);
+			pc_rtx, pc_rtx, 0, 0, 0);
 	  temp = simplify_gen_binary (MULT, m, temp,
 				      simplify_gen_binary (MULT, m, c1,
 							   const_true_rtx));
-	  temp = subst (temp, pc_rtx, pc_rtx, 0, 0);
+	  temp = subst (temp, pc_rtx, pc_rtx, 0, 0, 0);
 	  temp = simplify_gen_binary (op, m, gen_lowpart (m, z), temp);
 
 	  if (extend_op != UNKNOWN)
@@ -6326,10 +6342,18 @@
       enum rtx_code new_code;
       rtx op0, op1, tmp;
       int other_changed = 0;
+      rtx inner_compare = NULL_RTX;
       enum machine_mode compare_mode = GET_MODE (dest);
 
       if (GET_CODE (src) == COMPARE)
-	op0 = XEXP (src, 0), op1 = XEXP (src, 1);
+	{
+	  op0 = XEXP (src, 0), op1 = XEXP (src, 1);
+	  if (GET_CODE (op0) == COMPARE && op1 == const0_rtx)
+	    {
+	      inner_compare = op0;
+	      op0 = XEXP (inner_compare, 0), op1 = XEXP (inner_compare, 1);
+	    }
+	}
       else
 	op0 = src, op1 = CONST0_RTX (GET_MODE (src));
 
@@ -6371,6 +6395,12 @@
 	 need to use a different CC mode here.  */
       if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
 	compare_mode = GET_MODE (op0);
+      else if (inner_compare
+	       && GET_MODE_CLASS (GET_MODE (inner_compare)) == MODE_CC
+	       && new_code == old_code
+	       && op0 == XEXP (inner_compare, 0)
+	       && op1 == XEXP (inner_compare, 1))
+	compare_mode = GET_MODE (inner_compare);
       else
 	compare_mode = SELECT_CC_MODE (new_code, op0, op1);
 
--- a/src/gcc/common.opt
+++ b/src/gcc/common.opt
@@ -1617,6 +1617,19 @@
 Common Report Var(flag_sched_pressure) Init(0) Optimization
 Enable register pressure sensitive insn scheduling
 
+fsched-pressure-algorithm=
+Common Joined RejectNegative Enum(sched_pressure_algorithm) Var(flag_sched_pressure_algorithm) Init(SCHED_PRESSURE_WEIGHTED)
+-fira-algorithm=[CB|priority] Set the used IRA algorithm
+
+Enum
+Name(sched_pressure_algorithm) Type(enum sched_pressure_algorithm) UnknownError(unknown %<fsched-pressure%> algorithm %qs)
+
+EnumValue
+Enum(sched_pressure_algorithm) String(weighted) Value(SCHED_PRESSURE_WEIGHTED)
+
+EnumValue
+Enum(sched_pressure_algorithm) String(model) Value(SCHED_PRESSURE_MODEL)
+
 fsched-spec
 Common Report Var(flag_schedule_speculative) Init(1) Optimization
 Allow speculative motion of non-loads
--- a/src/gcc/config/arm/arm.c
+++ b/src/gcc/config/arm/arm.c
@@ -63,6 +63,11 @@
 
 void (*arm_lang_output_object_attributes_hook)(void);
 
+struct four_ints
+{
+  int i[4];
+};
+
 /* Forward function declarations.  */
 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
 static int arm_compute_static_chain_stack_bytes (void);
@@ -81,7 +86,6 @@
 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
 static int thumb_far_jump_used_p (void);
 static bool thumb_force_lr_save (void);
-static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
 static rtx emit_sfm (int, int);
 static unsigned arm_size_return_regs (void);
 static bool arm_assemble_integer (rtx, unsigned int, int);
@@ -129,7 +133,13 @@
 static int arm_comp_type_attributes (const_tree, const_tree);
 static void arm_set_default_type_attributes (tree);
 static int arm_adjust_cost (rtx, rtx, rtx, int);
-static int count_insns_for_constant (HOST_WIDE_INT, int);
+static int optimal_immediate_sequence (enum rtx_code code,
+				       unsigned HOST_WIDE_INT val,
+				       struct four_ints *return_sequence);
+static int optimal_immediate_sequence_1 (enum rtx_code code,
+					 unsigned HOST_WIDE_INT val,
+					 struct four_ints *return_sequence,
+					 int i);
 static int arm_get_strip_length (int);
 static bool arm_function_ok_for_sibcall (tree, tree);
 static enum machine_mode arm_promote_function_mode (const_tree,
@@ -143,6 +153,8 @@
 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
 				 tree);
 static bool arm_have_conditional_execution (void);
+static bool arm_cannot_force_const_mem (rtx);
+static bool arm_legitimate_constant_p (enum machine_mode, rtx);
 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
@@ -160,6 +172,7 @@
 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
+static tree arm_builtin_decl (unsigned, bool);
 static void emit_constant_insn (rtx cond, rtx pattern);
 static rtx emit_set_insn (rtx, rtx);
 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
@@ -241,6 +254,8 @@
 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
+static bool arm_array_mode_supported_p (enum machine_mode,
+					unsigned HOST_WIDE_INT);
 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
 static bool arm_class_likely_spilled_p (reg_class_t);
 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
@@ -251,6 +266,9 @@
 						     bool is_packed);
 static void arm_conditional_register_usage (void);
 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
+static unsigned int arm_autovectorize_vector_sizes (void);
+static int arm_default_branch_cost (bool, bool);
+static int arm_cortex_a5_branch_cost (bool, bool);
 
 
 /* Table of machine attributes.  */
@@ -294,6 +312,11 @@
 /* Set default optimization options.  */
 static const struct default_options arm_option_optimization_table[] =
   {
+    /* Enable -fsched-pressure using -fsched-pressure-algorithm=model
+       by default when optimizing.  */
+    { OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 },
+    { OPT_LEVELS_1_PLUS, OPT_fsched_pressure_algorithm_,
+      NULL, SCHED_PRESSURE_MODEL },
     /* Enable section anchors by default at -O1 or higher.  */
     { OPT_LEVELS_1_PLUS, OPT_fsection_anchors, NULL, 1 },
     { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
@@ -394,8 +417,13 @@
 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
+#undef TARGET_ARRAY_MODE_SUPPORTED_P
+#define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
+#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
+#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
+  arm_autovectorize_vector_sizes
 
 #undef  TARGET_MACHINE_DEPENDENT_REORG
 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
@@ -404,6 +432,8 @@
 #define TARGET_INIT_BUILTINS  arm_init_builtins
 #undef  TARGET_EXPAND_BUILTIN
 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
+#undef  TARGET_BUILTIN_DECL
+#define TARGET_BUILTIN_DECL arm_builtin_decl
 
 #undef TARGET_INIT_LIBFUNCS
 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
@@ -520,6 +550,9 @@
 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
 
+#undef TARGET_LEGITIMATE_CONSTANT_P
+#define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
+
 #undef TARGET_CANNOT_FORCE_CONST_MEM
 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
 
@@ -663,12 +696,13 @@
 #define FL_THUMB2     (1 << 16)	      /* Thumb-2.  */
 #define FL_NOTM	      (1 << 17)	      /* Instructions not present in the 'M'
 					 profile.  */
-#define FL_DIV	      (1 << 18)	      /* Hardware divide.  */
+#define FL_THUMB_DIV  (1 << 18)	      /* Hardware divide (Thumb mode).  */
 #define FL_VFPV3      (1 << 19)       /* Vector Floating Point V3.  */
 #define FL_NEON       (1 << 20)       /* Neon instructions.  */
 #define FL_ARCH7EM    (1 << 21)	      /* Instructions present in the ARMv7E-M
 					 architecture.  */
 #define FL_ARCH7      (1 << 22)       /* Architecture 7.  */
+#define FL_ARM_DIV    (1 << 23)	      /* Hardware divide (ARM mode).  */
 
 #define FL_IWMMXT     (1 << 29)	      /* XScale v2 or "Intel Wireless MMX technology".  */
 
@@ -695,8 +729,8 @@
 #define FL_FOR_ARCH6M	(FL_FOR_ARCH6 & ~FL_NOTM)
 #define FL_FOR_ARCH7	((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
 #define FL_FOR_ARCH7A	(FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
-#define FL_FOR_ARCH7R	(FL_FOR_ARCH7A | FL_DIV)
-#define FL_FOR_ARCH7M	(FL_FOR_ARCH7 | FL_DIV)
+#define FL_FOR_ARCH7R	(FL_FOR_ARCH7A | FL_THUMB_DIV)
+#define FL_FOR_ARCH7M	(FL_FOR_ARCH7 | FL_THUMB_DIV)
 #define FL_FOR_ARCH7EM  (FL_FOR_ARCH7M | FL_ARCH7EM)
 
 /* The bits in this mask specify which
@@ -782,7 +816,8 @@
 int arm_arch_thumb2;
 
 /* Nonzero if chip supports integer division instruction.  */
-int arm_arch_hwdiv;
+int arm_arch_arm_hwdiv;
+int arm_arch_thumb_hwdiv;
 
 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
    we must report the mode of the memory reference from
@@ -855,48 +890,117 @@
 {
   arm_slowmul_rtx_costs,
   NULL,
-  3,
-  ARM_PREFETCH_NOT_BENEFICIAL
+  3,						/* Constant limit.  */
+  5,						/* Max cond insns.  */
+  ARM_PREFETCH_NOT_BENEFICIAL,
+  true,						/* Prefer constant pool.  */
+  arm_default_branch_cost
 };
 
 const struct tune_params arm_fastmul_tune =
 {
   arm_fastmul_rtx_costs,
   NULL,
-  1,
-  ARM_PREFETCH_NOT_BENEFICIAL
+  1,						/* Constant limit.  */
+  5,						/* Max cond insns.  */
+  ARM_PREFETCH_NOT_BENEFICIAL,
+  true,						/* Prefer constant pool.  */
+  arm_default_branch_cost
+};
+
+/* StrongARM has early execution of branches, so a sequence that is worth
+   skipping is shorter.  Set max_insns_skipped to a lower value.  */
+
+const struct tune_params arm_strongarm_tune =
+{
+  arm_fastmul_rtx_costs,
+  NULL,
+  1,						/* Constant limit.  */
+  3,						/* Max cond insns.  */
+  ARM_PREFETCH_NOT_BENEFICIAL,
+  true,						/* Prefer constant pool.  */
+  arm_default_branch_cost
 };
 
 const struct tune_params arm_xscale_tune =
 {
   arm_xscale_rtx_costs,
   xscale_sched_adjust_cost,
-  2,
-  ARM_PREFETCH_NOT_BENEFICIAL
+  2,						/* Constant limit.  */
+  3,						/* Max cond insns.  */
+  ARM_PREFETCH_NOT_BENEFICIAL,
+  true,						/* Prefer constant pool.  */
+  arm_default_branch_cost
 };
 
 const struct tune_params arm_9e_tune =
 {
   arm_9e_rtx_costs,
   NULL,
-  1,
-  ARM_PREFETCH_NOT_BENEFICIAL
+  1,						/* Constant limit.  */
+  5,						/* Max cond insns.  */
+  ARM_PREFETCH_NOT_BENEFICIAL,
+  true,						/* Prefer constant pool.  */
+  arm_default_branch_cost
+};
+
+const struct tune_params arm_v6t2_tune =
+{
+  arm_9e_rtx_costs,
+  NULL,
+  1,						/* Constant limit.  */
+  5,						/* Max cond insns.  */
+  ARM_PREFETCH_NOT_BENEFICIAL,
+  false,					/* Prefer constant pool.  */
+  arm_default_branch_cost
+};
+
+/* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
+const struct tune_params arm_cortex_tune =
+{
+  arm_9e_rtx_costs,
+  NULL,
+  1,						/* Constant limit.  */
+  5,						/* Max cond insns.  */
+  ARM_PREFETCH_NOT_BENEFICIAL,
+  false,					/* Prefer constant pool.  */
+  arm_default_branch_cost
+};
+
+/* Branches can be dual-issued on Cortex-A5, so conditional execution is
+   less appealing.  Set max_insns_skipped to a low value.  */
+
+const struct tune_params arm_cortex_a5_tune =
+{
+  arm_9e_rtx_costs,
+  NULL,
+  1,						/* Constant limit.  */
+  1,						/* Max cond insns.  */
+  ARM_PREFETCH_NOT_BENEFICIAL,
+  false,					/* Prefer constant pool.  */
+  arm_cortex_a5_branch_cost
 };
 
 const struct tune_params arm_cortex_a9_tune =
 {
   arm_9e_rtx_costs,
   cortex_a9_sched_adjust_cost,
-  1,
-  ARM_PREFETCH_BENEFICIAL(4,32,32)
+  1,						/* Constant limit.  */
+  5,						/* Max cond insns.  */
+  ARM_PREFETCH_BENEFICIAL(4,32,32),
+  false,					/* Prefer constant pool.  */
+  arm_default_branch_cost
 };
 
 const struct tune_params arm_fa726te_tune =
 {
   arm_9e_rtx_costs,
   fa726te_sched_adjust_cost,
-  1,
-  ARM_PREFETCH_NOT_BENEFICIAL
+  1,						/* Constant limit.  */
+  5,						/* Max cond insns.  */
+  ARM_PREFETCH_NOT_BENEFICIAL,
+  true,						/* Prefer constant pool.  */
+  arm_default_branch_cost
 };
 
 
@@ -1702,7 +1806,8 @@
   arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
   arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
   arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
-  arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
+  arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
+  arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
   arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
 
   /* If we are not using the default (ARM mode) section anchor offset
@@ -1969,6 +2074,28 @@
 	fix_cm3_ldrd = 0;
     }
 
+  /* Enable -munaligned-access by default for
+     - all ARMv6 architecture-based processors
+     - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
+
+     Disable -munaligned-access by default for
+     - all pre-ARMv6 architecture-based processors
+     - ARMv6-M architecture-based processors.  */
+
+  if (unaligned_access == 2)
+    {
+      if (arm_arch6 && (arm_arch_notm || arm_arch7))
+	unaligned_access = 1;
+      else
+	unaligned_access = 0;
+    }
+  else if (unaligned_access == 1
+	   && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
+    {
+      warning (0, "target CPU does not support unaligned accesses");
+      unaligned_access = 0;
+    }
+
   if (TARGET_THUMB1 && flag_schedule_insns)
     {
       /* Don't warn since it's on by default in -O2.  */
@@ -1982,12 +2109,7 @@
       max_insns_skipped = 6;
     }
   else
-    {
-      /* StrongARM has early execution of branches, so a sequence
-         that is worth skipping is shorter.  */
-      if (arm_tune_strongarm)
-        max_insns_skipped = 3;
-    }
+    max_insns_skipped = current_tune->max_insns_skipped;
 
   /* Hot/Cold partitioning is not currently supported, since we can't
      handle literal pool placement in that case.  */
@@ -2445,7 +2567,7 @@
 }
 
 /* Return true if I is a valid constant for the operation CODE.  */
-static int
+int
 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
 {
   if (const_ok_for_arm (i))
@@ -2453,7 +2575,21 @@
 
   switch (code)
     {
+    case SET:
+      /* See if we can use movw.  */
+      if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
+	return 1;
+      else
+	return 0;
+
     case PLUS:
+      /* See if we can use addw or subw.  */
+      if (TARGET_THUMB2
+	  && ((i & 0xfffff000) == 0
+	      || ((-i) & 0xfffff000) == 0))
+	return 1;
+      /* else fall through.  */
+
     case COMPARE:
     case EQ:
     case NE:
@@ -2569,68 +2705,41 @@
 			   1);
 }
 
-/* Return the number of instructions required to synthesize the given
-   constant, if we start emitting them from bit-position I.  */
-static int
-count_insns_for_constant (HOST_WIDE_INT remainder, int i)
-{
-  HOST_WIDE_INT temp1;
-  int step_size = TARGET_ARM ? 2 : 1;
-  int num_insns = 0;
-
-  gcc_assert (TARGET_ARM || i == 0);
-
-  do
-    {
-      int end;
-
-      if (i <= 0)
-	i += 32;
-      if (remainder & (((1 << step_size) - 1) << (i - step_size)))
-	{
-	  end = i - 8;
-	  if (end < 0)
-	    end += 32;
-	  temp1 = remainder & ((0x0ff << end)
-				    | ((i < end) ? (0xff >> (32 - end)) : 0));
-	  remainder &= ~temp1;
-	  num_insns++;
-	  i -= 8 - step_size;
-	}
-      i -= step_size;
-    } while (remainder);
-  return num_insns;
-}
-
+/* Return a sequence of integers, in RETURN_SEQUENCE that fit into
+   ARM/THUMB2 immediates, and add up to VAL.
+   Thr function return value gives the number of insns required.  */
 static int
-find_best_start (unsigned HOST_WIDE_INT remainder)
+optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
+			    struct four_ints *return_sequence)
 {
   int best_consecutive_zeros = 0;
   int i;
   int best_start = 0;
+  int insns1, insns2;
+  struct four_ints tmp_sequence;
 
   /* If we aren't targetting ARM, the best place to start is always at
-     the bottom.  */
-  if (! TARGET_ARM)
-    return 0;
-
-  for (i = 0; i < 32; i += 2)
+     the bottom, otherwise look more closely.  */
+  if (TARGET_ARM)
     {
-      int consecutive_zeros = 0;
-
-      if (!(remainder & (3 << i)))
+      for (i = 0; i < 32; i += 2)
 	{
-	  while ((i < 32) && !(remainder & (3 << i)))
-	    {
-	      consecutive_zeros += 2;
-	      i += 2;
-	    }
-	  if (consecutive_zeros > best_consecutive_zeros)
+	  int consecutive_zeros = 0;
+
+	  if (!(val & (3 << i)))
 	    {
-	      best_consecutive_zeros = consecutive_zeros;
-	      best_start = i - consecutive_zeros;
+	      while ((i < 32) && !(val & (3 << i)))
+		{
+		  consecutive_zeros += 2;
+		  i += 2;
+		}
+	      if (consecutive_zeros > best_consecutive_zeros)
+		{
+		  best_consecutive_zeros = consecutive_zeros;
+		  best_start = i - consecutive_zeros;
+		}
+	      i -= 2;
 	    }
-	  i -= 2;
 	}
     }
 
@@ -2657,13 +2766,161 @@
      the constant starting from `best_start', and also starting from
      zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
      yield a shorter sequence, we may as well use zero.  */
+  insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
   if (best_start != 0
-      && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
-      && (count_insns_for_constant (remainder, 0) <=
-	  count_insns_for_constant (remainder, best_start)))
-    best_start = 0;
+      && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
+    {
+      insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
+      if (insns2 <= insns1)
+	{
+	  *return_sequence = tmp_sequence;
+	  insns1 = insns2;
+	}
+    }
+
+  return insns1;
+}
+
+/* As for optimal_immediate_sequence, but starting at bit-position I.  */
+static int
+optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
+			     struct four_ints *return_sequence, int i)
+{
+  int remainder = val & 0xffffffff;
+  int insns = 0;
+
+  /* Try and find a way of doing the job in either two or three
+     instructions.
+     
+     In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
+     location.  We start at position I.  This may be the MSB, or
+     optimial_immediate_sequence may have positioned it at the largest block 
+     of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
+     wrapping around to the top of the word when we drop off the bottom.
+     In the worst case this code should produce no more than four insns.
+
+     In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
+     constants, shifted to any arbitrary location.  We should always start
+     at the MSB.  */
+  do
+    {
+      int end;
+      unsigned int b1, b2, b3, b4;
+      unsigned HOST_WIDE_INT result;
+      int loc;
+
+      gcc_assert (insns < 4);
+
+      if (i <= 0)
+	i += 32;
+
+      /* First, find the next normal 12/8-bit shifted/rotated immediate.  */
+      if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
+	{
+	  loc = i;
+	  if (i <= 12 && TARGET_THUMB2 && code == PLUS)
+	    /* We can use addw/subw for the last 12 bits.  */
+	    result = remainder;
+	  else
+	    {
+	      /* Use an 8-bit shifted/rotated immediate.  */
+	      end = i - 8;
+	      if (end < 0)
+		end += 32;
+	      result = remainder & ((0x0ff << end)
+				   | ((i < end) ? (0xff >> (32 - end))
+						: 0));
+	      i -= 8;
+	    }
+	}
+      else
+	{
+	  /* Arm allows rotates by a multiple of two. Thumb-2 allows
+	     arbitrary shifts.  */
+	  i -= TARGET_ARM ? 2 : 1;
+	  continue;
+	}
+
+      /* Next, see if we can do a better job with a thumb2 replicated
+	 constant.
+       
+         We do it this way around to catch the cases like 0x01F001E0 where
+	 two 8-bit immediates would work, but a replicated constant would
+	 make it worse.
+       
+         TODO: 16-bit constants that don't clear all the bits, but still win.
+         TODO: Arithmetic splitting for set/add/sub, rather than bitwise.  */
+      if (TARGET_THUMB2)
+	{
+	  b1 = (remainder & 0xff000000) >> 24;
+	  b2 = (remainder & 0x00ff0000) >> 16;
+	  b3 = (remainder & 0x0000ff00) >> 8;
+	  b4 = remainder & 0xff;
+
+	  if (loc > 24)
+	    {
+	      /* The 8-bit immediate already found clears b1 (and maybe b2),
+		 but must leave b3 and b4 alone.  */
+
+	      /* First try to find a 32-bit replicated constant that clears
+		 almost everything.  We can assume that we can't do it in one,
+		 or else we wouldn't be here.  */
+	      unsigned int tmp = b1 & b2 & b3 & b4;
+	      unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
+				  + (tmp << 24);
+	      unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
+					    + (tmp == b3) + (tmp == b4);
+	      if (tmp
+		  && (matching_bytes >= 3
+		      || (matching_bytes == 2
+			  && const_ok_for_op (remainder & ~tmp2, code))))
+		{
+		  /* At least 3 of the bytes match, and the fourth has at 
+		     least as many bits set, or two of the bytes match
+		     and it will only require one more insn to finish.  */
+		  result = tmp2;
+		  i = tmp != b1 ? 32
+		      : tmp != b2 ? 24
+		      : tmp != b3 ? 16
+		      : 8;
+		}
+
+	      /* Second, try to find a 16-bit replicated constant that can
+		 leave three of the bytes clear.  If b2 or b4 is already
+		 zero, then we can.  If the 8-bit from above would not
+		 clear b2 anyway, then we still win.  */
+	      else if (b1 == b3 && (!b2 || !b4
+			       || (remainder & 0x00ff0000 & ~result)))
+		{
+		  result = remainder & 0xff00ff00;
+		  i = 24;
+		}
+	    }
+	  else if (loc > 16)
+	    {
+	      /* The 8-bit immediate already found clears b2 (and maybe b3)
+		 and we don't get here unless b1 is alredy clear, but it will
+		 leave b4 unchanged.  */
+
+	      /* If we can clear b2 and b4 at once, then we win, since the
+		 8-bits couldn't possibly reach that far.  */
+	      if (b2 == b4)
+		{
+		  result = remainder & 0x00ff00ff;
+		  i = 16;
+		}
+	    }
+	}
 
-  return best_start;
+      return_sequence->i[insns++] = result;
+      remainder &= ~result;
+
+      if (code == SET || code == MINUS)
+	code = PLUS;
+    }
+  while (remainder);
+
+  return insns;
 }
 
 /* Emit an instruction with the indicated PATTERN.  If COND is
@@ -2680,7 +2937,6 @@
 
 /* As above, but extra parameter GENERATE which, if clear, suppresses
    RTL generation.  */
-/* ??? This needs more work for thumb2.  */
 
 static int
 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
@@ -2692,15 +2948,15 @@
   int final_invert = 0;
   int can_negate_initial = 0;
   int i;
-  int num_bits_set = 0;
   int set_sign_bit_copies = 0;
   int clear_sign_bit_copies = 0;
   int clear_zero_bit_copies = 0;
   int set_zero_bit_copies = 0;
-  int insns = 0;
+  int insns = 0, neg_insns, inv_insns;
   unsigned HOST_WIDE_INT temp1, temp2;
   unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
-  int step_size = TARGET_ARM ? 2 : 1;
+  struct four_ints *immediates;
+  struct four_ints pos_immediates, neg_immediates, inv_immediates;
 
   /* Find out which operations are safe for a given CODE.  Also do a quick
      check for degenerate cases; these can occur when DImode operations
@@ -2709,7 +2965,6 @@
     {
     case SET:
       can_invert = 1;
-      can_negate = 1;
       break;
 
     case PLUS:
@@ -2737,9 +2992,6 @@
 				gen_rtx_SET (VOIDmode, target, source));
 	  return 1;
 	}
-
-      if (TARGET_THUMB2)
-	can_invert = 1;
       break;
 
     case AND:
@@ -2781,6 +3033,7 @@
 					     gen_rtx_NOT (mode, source)));
 	  return 1;
 	}
+      final_invert = 1;
       break;
 
     case MINUS:
@@ -2803,7 +3056,6 @@
 							    source)));
 	  return 1;
 	}
-      can_negate = 1;
 
       break;
 
@@ -2812,9 +3064,7 @@
     }
 
   /* If we can do it in one insn get out quickly.  */
-  if (const_ok_for_arm (val)
-      || (can_negate_initial && const_ok_for_arm (-val))
-      || (can_invert && const_ok_for_arm (~val)))
+  if (const_ok_for_op (val, code))
     {
       if (generate)
 	emit_constant_insn (cond,
@@ -2867,15 +3117,6 @@
   switch (code)
     {
     case SET:
-      /* See if we can use movw.  */
-      if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
-	{
-	  if (generate)
-	    emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
-						   GEN_INT (val)));
-	  return 1;
-	}
-
       /* See if we can do this by sign_extending a constant that is known
 	 to be negative.  This is a good, way of doing it, since the shift
 	 may well merge into a subsequent insn.  */
@@ -3226,121 +3467,97 @@
       break;
     }
 
-  for (i = 0; i < 32; i++)
-    if (remainder & (1 << i))
-      num_bits_set++;
-
-  if ((code == AND)
-      || (code != IOR && can_invert && num_bits_set > 16))
-    remainder ^= 0xffffffff;
-  else if (code == PLUS && num_bits_set > 16)
-    remainder = (-remainder) & 0xffffffff;
-
-  /* For XOR, if more than half the bits are set and there's a sequence
-     of more than 8 consecutive ones in the pattern then we can XOR by the
-     inverted constant and then invert the final result; this may save an
-     instruction and might also lead to the final mvn being merged with
-     some other operation.  */
-  else if (code == XOR && num_bits_set > 16
-	   && (count_insns_for_constant (remainder ^ 0xffffffff,
-					 find_best_start
-					 (remainder ^ 0xffffffff))
-	       < count_insns_for_constant (remainder,
-					   find_best_start (remainder))))
+  /* Calculate what the instruction sequences would be if we generated it
+     normally, negated, or inverted.  */
+  if (code == AND)
+    /* AND cannot be split into multiple insns, so invert and use BIC.  */
+    insns = 99;
+  else
+    insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
+
+  if (can_negate)
+    neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
+					    &neg_immediates);
+  else
+    neg_insns = 99;
+
+  if (can_invert || final_invert)
+    inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
+					    &inv_immediates);
+  else
+    inv_insns = 99;
+
+  immediates = &pos_immediates;
+
+  /* Is the negated immediate sequence more efficient?  */
+  if (neg_insns < insns && neg_insns <= inv_insns)
     {
-      remainder ^= 0xffffffff;
-      final_invert = 1;
+      insns = neg_insns;
+      immediates = &neg_immediates;
+    }
+  else
+    can_negate = 0;
+
+  /* Is the inverted immediate sequence more efficient?
+     We must allow for an extra NOT instruction for XOR operations, although
+     there is some chance that the final 'mvn' will get optimized later.  */
+  if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
+    {
+      insns = inv_insns;
+      immediates = &inv_immediates;
     }
   else
     {
       can_invert = 0;
-      can_negate = 0;
+      final_invert = 0;
     }
 
-  /* Now try and find a way of doing the job in either two or three
-     instructions.
-     We start by looking for the largest block of zeros that are aligned on
-     a 2-bit boundary, we then fill up the temps, wrapping around to the
-     top of the word when we drop off the bottom.
-     In the worst case this code should produce no more than four insns.
-     Thumb-2 constants are shifted, not rotated, so the MSB is always the
-     best place to start.  */
-
-  /* ??? Use thumb2 replicated constants when the high and low halfwords are
-     the same.  */
-  {
-    /* Now start emitting the insns.  */
-    i = find_best_start (remainder);
-    do
-      {
-	int end;
+  /* Now output the chosen sequence as instructions.  */
+  if (generate)
+    {
+      for (i = 0; i < insns; i++)
+	{
+	  rtx new_src, temp1_rtx;
 
-	if (i <= 0)
-	  i += 32;
-	if (remainder & (3 << (i - 2)))
-	  {
-	    end = i - 8;
-	    if (end < 0)
-	      end += 32;
-	    temp1 = remainder & ((0x0ff << end)
-				 | ((i < end) ? (0xff >> (32 - end)) : 0));
-	    remainder &= ~temp1;
+	  temp1 = immediates->i[i];
 
-	    if (generate)
-	      {
-		rtx new_src, temp1_rtx;
+	  if (code == SET || code == MINUS)
+	    new_src = (subtargets ? gen_reg_rtx (mode) : target);
+	  else if ((final_invert || i < (insns - 1)) && subtargets)
+	    new_src = gen_reg_rtx (mode);
+	  else
+	    new_src = target;
 
-		if (code == SET || code == MINUS)
-		  {
-		    new_src = (subtargets ? gen_reg_rtx (mode) : target);
-		    if (can_invert && code != MINUS)
-		      temp1 = ~temp1;
-		  }
-		else
-		  {
-		    if ((final_invert || remainder) && subtargets)
-		      new_src = gen_reg_rtx (mode);
-		    else
-		      new_src = target;
-		    if (can_invert)
-		      temp1 = ~temp1;
-		    else if (can_negate)
-		      temp1 = -temp1;
-		  }
+	  if (can_invert)
+	    temp1 = ~temp1;
+	  else if (can_negate)
+	    temp1 = -temp1;
 
-		temp1 = trunc_int_for_mode (temp1, mode);
-		temp1_rtx = GEN_INT (temp1);
+	  temp1 = trunc_int_for_mode (temp1, mode);
+	  temp1_rtx = GEN_INT (temp1);
 
-		if (code == SET)
-		  ;
-		else if (code == MINUS)
-		  temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
-		else
-		  temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
+	  if (code == SET)
+	    ;
+	  else if (code == MINUS)
+	    temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
+	  else
+	    temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
 
-		emit_constant_insn (cond,
-				    gen_rtx_SET (VOIDmode, new_src,
-						 temp1_rtx));
-		source = new_src;
-	      }
+	  emit_constant_insn (cond,
+			      gen_rtx_SET (VOIDmode, new_src,
+					   temp1_rtx));
+	  source = new_src;
 
-	    if (code == SET)
-	      {
-		can_invert = 0;
-		code = PLUS;
-	      }
-	    else if (code == MINUS)
+	  if (code == SET)
+	    {
+	      can_negate = can_invert;
+	      can_invert = 0;
 	      code = PLUS;
-
-	    insns++;
-	    i -= 8 - step_size;
-	  }
-	/* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
-	   shifts.  */
-	i -= step_size;
-      }
-    while (remainder);
-  }
+	    }
+	  else if (code == MINUS)
+	    code = PLUS;
+	}
+    }
 
   if (final_invert)
     {
@@ -4119,6 +4336,11 @@
 	  (TARGET_VFP_DOUBLE || !is_double));
 }
 
+/* Return true if an argument whose type is TYPE, or mode is MODE, is
+   suitable for passing or returning in VFP registers for the PCS
+   variant selected.  If it is, then *BASE_MODE is updated to contain
+   a machine mode describing each element of the argument's type and
+   *COUNT to hold the number of such elements.  */
 static bool
 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
 				       enum machine_mode mode, const_tree type,
@@ -4126,9 +4348,20 @@
 {
   enum machine_mode new_mode = VOIDmode;
 
-  if (GET_MODE_CLASS (mode) == MODE_FLOAT
-      || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
-      || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
+  /* If we have the type information, prefer that to working things
+     out from the mode.  */
+  if (type)
+    {
+      int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
+
+      if (ag_count > 0 && ag_count <= 4)
+	*count = ag_count;
+      else
+	return false;
+    }
+  else if (GET_MODE_CLASS (mode) == MODE_FLOAT
+	   || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+	   || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
     {
       *count = 1;
       new_mode = mode;
@@ -4138,15 +4371,6 @@
       *count = 2;
       new_mode = (mode == DCmode ? DFmode : SFmode);
     }
-  else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
-    {
-      int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
-
-      if (ag_count > 0 && ag_count <= 4)
-	*count = ag_count;
-      else
-	return false;
-    }
   else
     return false;
 
@@ -5950,7 +6174,7 @@
    addresses based on the frame pointer or arg pointer until the
    reload pass starts.  This is so that eliminating such addresses
    into stack based ones won't produce impossible code.  */
-static int
+int
 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
 {
   /* ??? Not clear if this is right.  Experiment.  */
@@ -6432,31 +6656,159 @@
 			       int opnum, int type,
 			       int ind_levels ATTRIBUTE_UNUSED)
 {
+  /* We must recognize output that we have already generated ourselves.  */
+  if (GET_CODE (*p) == PLUS
+      && GET_CODE (XEXP (*p, 0)) == PLUS
+      && GET_CODE (XEXP (XEXP (*p, 0), 0)) == REG
+      && GET_CODE (XEXP (XEXP (*p, 0), 1)) == CONST_INT
+      && GET_CODE (XEXP (*p, 1)) == CONST_INT)
+    {
+      push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
+		   MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
+		   VOIDmode, 0, 0, opnum, (enum reload_type) type);
+      return true;
+    }
+
   if (GET_CODE (*p) == PLUS
       && GET_CODE (XEXP (*p, 0)) == REG
       && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
+      /* If the base register is equivalent to a constant, let the generic
+	 code handle it.  Otherwise we will run into problems if a future
+	 reload pass decides to rematerialize the constant.  */
+      && !reg_equiv_constant [ORIGINAL_REGNO (XEXP (*p, 0))]
       && GET_CODE (XEXP (*p, 1)) == CONST_INT)
     {
       HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
       HOST_WIDE_INT low, high;
 
-      if (mode == DImode || (mode == DFmode && TARGET_SOFT_FLOAT))
-	low = ((val & 0xf) ^ 0x8) - 0x8;
-      else if (TARGET_MAVERICK && TARGET_HARD_FLOAT)
-	/* Need to be careful, -256 is not a valid offset.  */
-	low = val >= 0 ? (val & 0xff) : -((-val) & 0xff);
-      else if (mode == SImode
-	       || (mode == SFmode && TARGET_SOFT_FLOAT)
-	       || ((mode == HImode || mode == QImode) && ! arm_arch4))
-	/* Need to be careful, -4096 is not a valid offset.  */
-	low = val >= 0 ? (val & 0xfff) : -((-val) & 0xfff);
-      else if ((mode == HImode || mode == QImode) && arm_arch4)
-	/* Need to be careful, -256 is not a valid offset.  */
-	low = val >= 0 ? (val & 0xff) : -((-val) & 0xff);
-      else if (GET_MODE_CLASS (mode) == MODE_FLOAT
-	       && TARGET_HARD_FLOAT && TARGET_FPA)
-	/* Need to be careful, -1024 is not a valid offset.  */
-	low = val >= 0 ? (val & 0x3ff) : -((-val) & 0x3ff);
+      /* Detect coprocessor load/stores.  */
+      bool coproc_p = ((TARGET_HARD_FLOAT
+			&& (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
+			&& (mode == SFmode || mode == DFmode
+			    || (mode == DImode && TARGET_MAVERICK)))
+		       || (TARGET_REALLY_IWMMXT
+			   && VALID_IWMMXT_REG_MODE (mode))
+		       || (TARGET_NEON
+			   && (VALID_NEON_DREG_MODE (mode)
+			       || VALID_NEON_QREG_MODE (mode))));
+
+      /* For some conditions, bail out when lower two bits are unaligned.  */
+      if ((val & 0x3) != 0
+	  /* Coprocessor load/store indexes are 8-bits + '00' appended.  */
+	  && (coproc_p
+	      /* For DI, and DF under soft-float: */
+	      || ((mode == DImode || mode == DFmode)
+		  /* Without ldrd, we use stm/ldm, which does not
+		     fair well with unaligned bits.  */
+		  && (! TARGET_LDRD
+		      /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4.  */
+		      || TARGET_THUMB2))))
+	return false;
+
+      /* When breaking down a [reg+index] reload address into [(reg+high)+low],
+	 of which the (reg+high) gets turned into a reload add insn,
+	 we try to decompose the index into high/low values that can often
+	 also lead to better reload CSE.
+	 For example:
+	         ldr r0, [r2, #4100]  // Offset too large
+		 ldr r1, [r2, #4104]  // Offset too large
+
+	 is best reloaded as:
+	         add t1, r2, #4096
+		 ldr r0, [t1, #4]
+		 add t2, r2, #4096
+		 ldr r1, [t2, #8]
+
+	 which post-reload CSE can simplify in most cases to eliminate the
+	 second add instruction:
+	         add t1, r2, #4096
+		 ldr r0, [t1, #4]
+		 ldr r1, [t1, #8]
+
+	 The idea here is that we want to split out the bits of the constant
+	 as a mask, rather than as subtracting the maximum offset that the
+	 respective type of load/store used can handle.
+
+	 When encountering negative offsets, we can still utilize it even if
+	 the overall offset is positive; sometimes this may lead to an immediate
+	 that can be constructed with fewer instructions.
+	 For example:
+	         ldr r0, [r2, #0x3FFFFC]
+
+	 This is best reloaded as:
+	         add t1, r2, #0x400000
+		 ldr r0, [t1, #-4]
+
+	 The trick for spotting this for a load insn with N bits of offset
+	 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
+	 negative offset that is going to make bit N and all the bits below
+	 it become zero in the remainder part.
+
+	 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
+	 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
+	 used in most cases of ARM load/store instructions.  */
+
+#define SIGN_MAG_LOW_ADDR_BITS(VAL, N)					\
+      (((VAL) & ((1 << (N)) - 1))					\
+       ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N))	\
+       : 0)
+
+      if (coproc_p)
+	{
+	  low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
+
+	  /* NEON quad-word load/stores are made of two double-word accesses,
+	     so the valid index range is reduced by 8. Treat as 9-bit range if
+	     we go over it.  */
+	  if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
+	    low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
+	}
+      else if (GET_MODE_SIZE (mode) == 8)
+	{
+	  if (TARGET_LDRD)
+	    low = (TARGET_THUMB2
+		   ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
+		   : SIGN_MAG_LOW_ADDR_BITS (val, 8));
+	  else
+	    /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
+	       to access doublewords. The supported load/store offsets are
+	       -8, -4, and 4, which we try to produce here.  */
+	    low = ((val & 0xf) ^ 0x8) - 0x8;
+	}
+      else if (GET_MODE_SIZE (mode) < 8)
+	{
+	  /* NEON element load/stores do not have an offset.  */
+	  if (TARGET_NEON_FP16 && mode == HFmode)
+	    return false;
+
+	  if (TARGET_THUMB2)
+	    {
+	      /* Thumb-2 has an asymmetrical index range of (-256,4096).
+		 Try the wider 12-bit range first, and re-try if the result
+		 is out of range.  */
+	      low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
+	      if (low < -255)
+		low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
+	    }
+	  else
+	    {
+	      if (mode == HImode || mode == HFmode)
+		{
+		  if (arm_arch4)
+		    low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
+		  else
+		    {
+		      /* The storehi/movhi_bytes fallbacks can use only
+			 [-4094,+4094] of the full ldrb/strb index range.  */
+		      low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
+		      if (low == 4095 || low == -4095)
+			return false;
+		    }
+		}
+	      else
+		low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
+	    }
+	}
       else
 	return false;
 
@@ -6569,9 +6921,47 @@
   return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
 }
 
+/* Implement TARGET_LEGITIMATE_CONSTANT_P.
+
+   On the ARM, allow any integer (invalid ones are removed later by insn
+   patterns), nice doubles and symbol_refs which refer to the function's
+   constant pool XXX.
+
+   When generating pic allow anything.  */
+
+static bool
+arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
+{
+  /* At present, we have no support for Neon structure constants, so forbid
+     them here.  It might be possible to handle simple cases like 0 and -1
+     in future.  */
+  if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
+    return false;
+
+  return flag_pic || !label_mentioned_p (x);
+}
+
+static bool
+thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
+{
+  return (GET_CODE (x) == CONST_INT
+	  || GET_CODE (x) == CONST_DOUBLE
+	  || CONSTANT_ADDRESS_P (x)
+	  || flag_pic);
+}
+
+static bool
+arm_legitimate_constant_p (enum machine_mode mode, rtx x)
+{
+  return (!arm_cannot_force_const_mem (x)
+	  && (TARGET_32BIT
+	      ? arm_legitimate_constant_p_1 (mode, x)
+	      : thumb_legitimate_constant_p (mode, x)));
+}
+
 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
 
-bool
+static bool
 arm_cannot_force_const_mem (rtx x)
 {
   rtx base, offset;
@@ -7267,6 +7657,9 @@
 	*total = COSTS_N_INSNS (4);
       return true;
 
+    case SET:
+      return false;
+
     case UNSPEC:
       /* We cost this as high as our memory costs to allow this to
 	 be hoisted from loops.  */
@@ -7623,6 +8016,9 @@
       *total = COSTS_N_INSNS (1) + 1;
       return true;
 
+    case SET:
+      return false;
+
     default:
       if (mode != VOIDmode)
 	*total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
@@ -8203,6 +8599,21 @@
   return cost;
 }
 
+static int
+arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
+{
+  if (TARGET_32BIT)
+    return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
+  else
+    return (optimize > 0) ? 2 : 0;
+}
+
+static int
+arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
+{
+  return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
+}
+
 static int fp_consts_inited = 0;
 
 /* Only zero is valid for VFP.  Other values are also valid for FPA.  */
@@ -8660,7 +9071,67 @@
   return 1;
 }
 
-/* Return a string suitable for output of Neon immediate logic operation
+/* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction.  If
+   the immediate is valid, write a constant suitable for using as an operand
+   to VSHR/VSHL to *MODCONST and the corresponding element width to
+   *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
+   because they have different limitations.  */
+
+int
+neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
+				rtx *modconst, int *elementwidth,
+				bool isleftshift)
+{
+  unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
+  unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
+  unsigned HOST_WIDE_INT last_elt = 0;
+  unsigned HOST_WIDE_INT maxshift;
+
+  /* Split vector constant out into a byte vector.  */
+  for (i = 0; i < n_elts; i++)
+    {
+      rtx el = CONST_VECTOR_ELT (op, i);
+      unsigned HOST_WIDE_INT elpart;
+
+      if (GET_CODE (el) == CONST_INT)
+        elpart = INTVAL (el);
+      else if (GET_CODE (el) == CONST_DOUBLE)
+        return 0;
+      else
+        gcc_unreachable ();
+
+      if (i != 0 && elpart != last_elt)
+        return 0;
+
+      last_elt = elpart;
+    }
+
+  /* Shift less than element size.  */
+  maxshift = innersize * 8;
+
+  if (isleftshift)
+    {
+      /* Left shift immediate value can be from 0 to <size>-1.  */
+      if (last_elt >= maxshift)
+        return 0;
+    }
+  else
+    {
+      /* Right shift immediate value can be from 1 to <size>.  */
+      if (last_elt == 0 || last_elt > maxshift)
+	return 0;
+    }
+
+  if (elementwidth)
+    *elementwidth = innersize * 8;
+
+  if (modconst)
+    *modconst = CONST_VECTOR_ELT (op, 0);
+
+  return 1;
+}
+
+/* Return a string suitable for output of Neon immediate logic operation
    MNEM.  */
 
 char *
@@ -8682,6 +9153,28 @@
   return templ;
 }
 
+/* Return a string suitable for output of Neon immediate shift operation
+   (VSHR or VSHL) MNEM.  */
+
+char *
+neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
+			     enum machine_mode mode, int quad,
+			     bool isleftshift)
+{
+  int width, is_valid;
+  static char templ[40];
+
+  is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
+  gcc_assert (is_valid != 0);
+
+  if (quad)
+    sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
+  else
+    sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
+
+  return templ;
+}
+
 /* Output a sequence of pairwise operations to implement a reduction.
    NOTE: We do "too much work" here, because pairwise operations work on two
    registers-worth of operands in one go. Unfortunately we can't exploit those
@@ -9154,6 +9647,11 @@
   if (GET_CODE (ind) == REG)
     return arm_address_register_rtx_p (ind, 0);
 
+  /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db).  */
+  if (GET_CODE (ind) == POST_INC
+      || GET_CODE (ind) == PRE_DEC)
+    return arm_address_register_rtx_p (XEXP (ind, 0), 0);
+
   return FALSE;
 }
 
@@ -9182,11 +9680,14 @@
       return GENERAL_REGS;
     }
 
+  /* The neon move patterns handle all legitimate vector and struct
+     addresses.  */
   if (TARGET_NEON
+      && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
       && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
-          || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
-      && neon_vector_mem_operand (x, 0))
-     return NO_REGS;
+	  || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
+	  || VALID_NEON_STRUCT_MODE (mode)))
+    return NO_REGS;
 
   if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
     return NO_REGS;
@@ -9501,6 +10002,42 @@
     }
 }
 
+/* Match pair of min/max operators that can be implemented via usat/ssat.  */
+
+bool
+arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
+			int *mask, bool *signed_sat)
+{
+  /* The high bound must be a power of two minus one.  */
+  int log = exact_log2 (INTVAL (hi_bound) + 1);
+  if (log == -1)
+    return false;
+
+  /* The low bound is either zero (for usat) or one less than the
+     negation of the high bound (for ssat).  */
+  if (INTVAL (lo_bound) == 0)
+    {
+      if (mask)
+        *mask = log;
+      if (signed_sat)
+        *signed_sat = false;
+
+      return true;
+    }
+
+  if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
+    {
+      if (mask)
+        *mask = log + 1;
+      if (signed_sat)
+        *signed_sat = true;
+
+      return true;
+    }
+
+  return false;
+}
+
 /* Return 1 if memory locations are adjacent.  */
 int
 adjacent_mem_locations (rtx a, rtx b)
@@ -10380,6 +10917,335 @@
   return true;
 }
 
+/* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
+   unaligned copies on processors which support unaligned semantics for those
+   instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
+   (using more registers) by doing e.g. load/load/store/store for a factor of 2.
+   An interleave factor of 1 (the minimum) will perform no interleaving. 
+   Load/store multiple are used for aligned addresses where possible.  */
+
+static void
+arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
+				   HOST_WIDE_INT length,
+				   unsigned int interleave_factor)
+{
+  rtx *regs = XALLOCAVEC (rtx, interleave_factor);
+  int *regnos = XALLOCAVEC (int, interleave_factor);
+  HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
+  HOST_WIDE_INT i, j;
+  HOST_WIDE_INT remaining = length, words;
+  rtx halfword_tmp = NULL, byte_tmp = NULL;
+  rtx dst, src;
+  bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
+  bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
+  HOST_WIDE_INT srcoffset, dstoffset;
+  HOST_WIDE_INT src_autoinc, dst_autoinc;
+  rtx mem, addr;
+  
+  gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
+  
+  /* Use hard registers if we have aligned source or destination so we can use
+     load/store multiple with contiguous registers.  */
+  if (dst_aligned || src_aligned)
+    for (i = 0; i < interleave_factor; i++)
+      regs[i] = gen_rtx_REG (SImode, i);
+  else
+    for (i = 0; i < interleave_factor; i++)
+      regs[i] = gen_reg_rtx (SImode);
+
+  dst = copy_addr_to_reg (XEXP (dstbase, 0));
+  src = copy_addr_to_reg (XEXP (srcbase, 0));
+
+  srcoffset = dstoffset = 0;
+  
+  /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
+     For copying the last bytes we want to subtract this offset again.  */
+  src_autoinc = dst_autoinc = 0;
+
+  for (i = 0; i < interleave_factor; i++)
+    regnos[i] = i;
+
+  /* Copy BLOCK_SIZE_BYTES chunks.  */
+
+  for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
+    {
+      /* Load words.  */
+      if (src_aligned && interleave_factor > 1)
+	{
+	  emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
+					    TRUE, srcbase, &srcoffset));
+	  src_autoinc += UNITS_PER_WORD * interleave_factor;
+	}
+      else
+	{
+	  for (j = 0; j < interleave_factor; j++)
+	    {
+	      addr = plus_constant (src, srcoffset + j * UNITS_PER_WORD
+					 - src_autoinc);
+	      mem = adjust_automodify_address (srcbase, SImode, addr,
+					       srcoffset + j * UNITS_PER_WORD);
+	      emit_insn (gen_unaligned_loadsi (regs[j], mem));
+	    }
+	  srcoffset += block_size_bytes;
+	}
+
+      /* Store words.  */
+      if (dst_aligned && interleave_factor > 1)
+	{
+	  emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
+					     TRUE, dstbase, &dstoffset));
+	  dst_autoinc += UNITS_PER_WORD * interleave_factor;
+	}
+      else
+	{
+	  for (j = 0; j < interleave_factor; j++)
+	    {
+	      addr = plus_constant (dst, dstoffset + j * UNITS_PER_WORD
+					 - dst_autoinc);
+	      mem = adjust_automodify_address (dstbase, SImode, addr,
+					       dstoffset + j * UNITS_PER_WORD);
+	      emit_insn (gen_unaligned_storesi (mem, regs[j]));
+	    }
+	  dstoffset += block_size_bytes;
+	}
+
+      remaining -= block_size_bytes;
+    }
+  
+  /* Copy any whole words left (note these aren't interleaved with any
+     subsequent halfword/byte load/stores in the interests of simplicity).  */
+  
+  words = remaining / UNITS_PER_WORD;
+
+  gcc_assert (words < interleave_factor);
+  
+  if (src_aligned && words > 1)
+    {
+      emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
+					&srcoffset));
+      src_autoinc += UNITS_PER_WORD * words;
+    }
+  else
+    {
+      for (j = 0; j < words; j++)
+	{
+	  addr = plus_constant (src,
+				srcoffset + j * UNITS_PER_WORD - src_autoinc);
+	  mem = adjust_automodify_address (srcbase, SImode, addr,
+					   srcoffset + j * UNITS_PER_WORD);
+	  emit_insn (gen_unaligned_loadsi (regs[j], mem));
+	}
+      srcoffset += words * UNITS_PER_WORD;
+    }
+
+  if (dst_aligned && words > 1)
+    {
+      emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
+					 &dstoffset));
+      dst_autoinc += words * UNITS_PER_WORD;
+    }
+  else
+    {
+      for (j = 0; j < words; j++)
+	{
+	  addr = plus_constant (dst,
+				dstoffset + j * UNITS_PER_WORD - dst_autoinc);
+	  mem = adjust_automodify_address (dstbase, SImode, addr,
+					   dstoffset + j * UNITS_PER_WORD);
+	  emit_insn (gen_unaligned_storesi (mem, regs[j]));
+	}
+      dstoffset += words * UNITS_PER_WORD;
+    }
+
+  remaining -= words * UNITS_PER_WORD;
+  
+  gcc_assert (remaining < 4);
+  
+  /* Copy a halfword if necessary.  */
+  
+  if (remaining >= 2)
+    {
+      halfword_tmp = gen_reg_rtx (SImode);
+
+      addr = plus_constant (src, srcoffset - src_autoinc);
+      mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
+      emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
+
+      /* Either write out immediately, or delay until we've loaded the last
+	 byte, depending on interleave factor.  */
+      if (interleave_factor == 1)
+	{
+	  addr = plus_constant (dst, dstoffset - dst_autoinc);
+	  mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
+	  emit_insn (gen_unaligned_storehi (mem,
+		       gen_lowpart (HImode, halfword_tmp)));
+	  halfword_tmp = NULL;
+	  dstoffset += 2;
+	}
+
+      remaining -= 2;
+      srcoffset += 2;
+    }
+  
+  gcc_assert (remaining < 2);
+  
+  /* Copy last byte.  */
+  
+  if ((remaining & 1) != 0)
+    {
+      byte_tmp = gen_reg_rtx (SImode);
+
+      addr = plus_constant (src, srcoffset - src_autoinc);
+      mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
+      emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
+
+      if (interleave_factor == 1)
+	{
+	  addr = plus_constant (dst, dstoffset - dst_autoinc);
+	  mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
+	  emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
+	  byte_tmp = NULL;
+	  dstoffset++;
+	}
+
+      remaining--;
+      srcoffset++;
+    }
+  
+  /* Store last halfword if we haven't done so already.  */
+  
+  if (halfword_tmp)
+    {
+      addr = plus_constant (dst, dstoffset - dst_autoinc);
+      mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
+      emit_insn (gen_unaligned_storehi (mem,
+		   gen_lowpart (HImode, halfword_tmp)));
+      dstoffset += 2;
+    }
+
+  /* Likewise for last byte.  */
+
+  if (byte_tmp)
+    {
+      addr = plus_constant (dst, dstoffset - dst_autoinc);
+      mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
+      emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
+      dstoffset++;
+    }
+  
+  gcc_assert (remaining == 0 && srcoffset == dstoffset);
+}
+
+/* From mips_adjust_block_mem:
+
+   Helper function for doing a loop-based block operation on memory
+   reference MEM.  Each iteration of the loop will operate on LENGTH
+   bytes of MEM.
+
+   Create a new base register for use within the loop and point it to
+   the start of MEM.  Create a new memory reference that uses this
+   register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
+
+static void
+arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
+		      rtx *loop_mem)
+{
+  *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
+  
+  /* Although the new mem does not refer to a known location,
+     it does keep up to LENGTH bytes of alignment.  */
+  *loop_mem = change_address (mem, BLKmode, *loop_reg);
+  set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
+}
+
+/* From mips_block_move_loop:
+
+   Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
+   bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
+   the memory regions do not overlap.  */
+
+static void
+arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
+			       unsigned int interleave_factor,
+			       HOST_WIDE_INT bytes_per_iter)
+{
+  rtx label, src_reg, dest_reg, final_src, test;
+  HOST_WIDE_INT leftover;
+  
+  leftover = length % bytes_per_iter;
+  length -= leftover;
+  
+  /* Create registers and memory references for use within the loop.  */
+  arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
+  arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
+  
+  /* Calculate the value that SRC_REG should have after the last iteration of
+     the loop.  */
+  final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
+				   0, 0, OPTAB_WIDEN);
+
+  /* Emit the start of the loop.  */
+  label = gen_label_rtx ();
+  emit_label (label);
+  
+  /* Emit the loop body.  */
+  arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
+				     interleave_factor);
+
+  /* Move on to the next block.  */
+  emit_move_insn (src_reg, plus_constant (src_reg, bytes_per_iter));
+  emit_move_insn (dest_reg, plus_constant (dest_reg, bytes_per_iter));
+  
+  /* Emit the loop condition.  */
+  test = gen_rtx_NE (VOIDmode, src_reg, final_src);
+  emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
+  
+  /* Mop up any left-over bytes.  */
+  if (leftover)
+    arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
+}
+
+/* Emit a block move when either the source or destination is unaligned (not
+   aligned to a four-byte boundary).  This may need further tuning depending on
+   core type, optimize_size setting, etc.  */
+
+static int
+arm_movmemqi_unaligned (rtx *operands)
+{
+  HOST_WIDE_INT length = INTVAL (operands[2]);
+  
+  if (optimize_size)
+    {
+      bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
+      bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
+      /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
+	 size of code if optimizing for size.  We'll use ldm/stm if src_aligned
+	 or dst_aligned though: allow more interleaving in those cases since the
+	 resulting code can be smaller.  */
+      unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
+      HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
+      
+      if (length > 12)
+	arm_block_move_unaligned_loop (operands[0], operands[1], length,
+				       interleave_factor, bytes_per_iter);
+      else
+	arm_block_move_unaligned_straight (operands[0], operands[1], length,
+					   interleave_factor);
+    }
+  else
+    {
+      /* Note that the loop created by arm_block_move_unaligned_loop may be
+	 subject to loop unrolling, which makes tuning this condition a little
+	 redundant.  */
+      if (length > 32)
+	arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
+      else
+	arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
+    }
+  
+  return 1;
+}
+
 int
 arm_gen_movmemqi (rtx *operands)
 {
@@ -10392,8 +11258,13 @@
 
   if (GET_CODE (operands[2]) != CONST_INT
       || GET_CODE (operands[3]) != CONST_INT
-      || INTVAL (operands[2]) > 64
-      || INTVAL (operands[3]) & 3)
+      || INTVAL (operands[2]) > 64)
+    return 0;
+
+  if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
+    return arm_movmemqi_unaligned (operands);
+
+  if (INTVAL (operands[3]) & 3)
     return 0;
 
   dstbase = operands[0];
@@ -10821,7 +11692,7 @@
 	    return CC_Zmode;
 
 	  /* We can do an equality test in three Thumb instructions.  */
-	  if (!TARGET_ARM)
+	  if (!TARGET_32BIT)
 	    return CC_Zmode;
 
 	  /* FALLTHROUGH */
@@ -10833,7 +11704,7 @@
 	  /* DImode unsigned comparisons can be implemented by cmp +
 	     cmpeq without a scratch register.  Not worth doing in
 	     Thumb-2.  */
-	  if (TARGET_ARM)
+	  if (TARGET_32BIT)
 	    return CC_CZmode;
 
 	  /* FALLTHROUGH */
@@ -11482,6 +12353,19 @@
   return 0;
 }
 
+/* Return the maximum amount of padding that will be inserted before
+   label LABEL.  */
+
+static HOST_WIDE_INT
+get_label_padding (rtx label)
+{
+  HOST_WIDE_INT align, min_insn_size;
+
+  align = 1 << label_to_alignment (label);
+  min_insn_size = TARGET_THUMB ? 2 : 4;
+  return align > min_insn_size ? align - min_insn_size : 0;
+}
+
 /* Move a minipool fix MP from its current location to before MAX_MP.
    If MAX_MP is NULL, then MP doesn't need moving, but the addressing
    constraints may need updating.  */
@@ -12028,8 +12912,12 @@
 	 within range.  */
       gcc_assert (GET_CODE (from) != BARRIER);
 
-      /* Count the length of this insn.  */
-      count += get_attr_length (from);
+      /* Count the length of this insn.  This must stay in sync with the
+	 code that pushes minipool fixes.  */
+      if (LABEL_P (from))
+	count += get_label_padding (from);
+      else
+	count += get_attr_length (from);
 
       /* If there is a jump table, add its length.  */
       tmp = is_jump_table (from);
@@ -12449,6 +13337,11 @@
 	      insn = table;
 	    }
 	}
+      else if (LABEL_P (insn))
+	/* Add the worst-case padding due to alignment.  We don't add
+	   the _current_ padding because the minipool insertions
+	   themselves might change it.  */
+	address += get_label_padding (insn);
     }
 
   fix = minipool_fix_head;
@@ -16640,7 +17533,8 @@
       {
 	rtx addr;
 	bool postinc = FALSE;
-	unsigned align, modesize, align_bits;
+	unsigned align, memsize, align_bits;
+	rtx memsize_rtx;
 
 	gcc_assert (GET_CODE (x) == MEM);
 	addr = XEXP (x, 0);
@@ -16655,14 +17549,15 @@
 	   instruction (for some alignments) as an aid to the memory subsystem
 	   of the target.  */
 	align = MEM_ALIGN (x) >> 3;
-	modesize = GET_MODE_SIZE (GET_MODE (x));
+	memsize_rtx = MEM_SIZE (x);
+	memsize = memsize_rtx ? INTVAL (memsize_rtx) : 0;
 	
 	/* Only certain alignment specifiers are supported by the hardware.  */
-	if (modesize == 16 && (align % 32) == 0)
+	if (memsize == 16 && (align % 32) == 0)
 	  align_bits = 256;
-	else if ((modesize == 8 || modesize == 16) && (align % 16) == 0)
+	else if (memsize == 16 && (align % 16) == 0)
 	  align_bits = 128;
-	else if ((align % 8) == 0)
+	else if (memsize >= 8 && (align % 8) == 0)
 	  align_bits = 64;
 	else
 	  align_bits = 0;
@@ -16712,6 +17607,11 @@
       }
       return;
 
+    case 'v':
+	gcc_assert (GET_CODE (x) == CONST_DOUBLE);
+	fprintf (stream, "#%d", vfp3_const_double_for_fract_bits (x));
+	return;
+
     /* Register specifier for vld1.16/vst1.16.  Translate the S register
        number into a D register number and element index.  */
     case 'z':
@@ -17071,10 +17971,10 @@
    decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
 
 /* Returns the index of the ARM condition code string in
-   `arm_condition_codes'.  COMPARISON should be an rtx like
-   `(eq (...) (...))'.  */
-static enum arm_cond_code
-get_arm_condition_code (rtx comparison)
+   `arm_condition_codes', or ARM_NV if the comparison is invalid.
+   COMPARISON should be an rtx like `(eq (...) (...))'.  */
+enum arm_cond_code
+maybe_get_arm_condition_code (rtx comparison)
 {
   enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
   enum arm_cond_code code;
@@ -17098,11 +17998,11 @@
     case CC_DLTUmode: code = ARM_CC;
 
     dominance:
-      gcc_assert (comp_code == EQ || comp_code == NE);
-
       if (comp_code == EQ)
 	return ARM_INVERSE_CONDITION_CODE (code);
-      return code;
+      if (comp_code == NE)
+	return code;
+      return ARM_NV;
 
     case CC_NOOVmode:
       switch (comp_code)
@@ -17111,7 +18011,7 @@
 	case EQ: return ARM_EQ;
 	case GE: return ARM_PL;
 	case LT: return ARM_MI;
-	default: gcc_unreachable ();
+	default: return ARM_NV;
 	}
 
     case CC_Zmode:
@@ -17119,7 +18019,7 @@
 	{
 	case NE: return ARM_NE;
 	case EQ: return ARM_EQ;
-	default: gcc_unreachable ();
+	default: return ARM_NV;
 	}
 
     case CC_Nmode:
@@ -17127,7 +18027,7 @@
 	{
 	case NE: return ARM_MI;
 	case EQ: return ARM_PL;
-	default: gcc_unreachable ();
+	default: return ARM_NV;
 	}
 
     case CCFPEmode:
@@ -17152,7 +18052,7 @@
 	  /* UNEQ and LTGT do not have a representation.  */
 	case UNEQ: /* Fall through.  */
 	case LTGT: /* Fall through.  */
-	default: gcc_unreachable ();
+	default: return ARM_NV;
 	}
 
     case CC_SWPmode:
@@ -17168,7 +18068,7 @@
 	case GTU: return ARM_CC;
 	case LEU: return ARM_CS;
 	case LTU: return ARM_HI;
-	default: gcc_unreachable ();
+	default: return ARM_NV;
 	}
 
     case CC_Cmode:
@@ -17176,7 +18076,7 @@
 	{
 	case LTU: return ARM_CS;
 	case GEU: return ARM_CC;
-	default: gcc_unreachable ();
+	default: return ARM_NV;
 	}
 
     case CC_CZmode:
@@ -17188,7 +18088,7 @@
 	case GTU: return ARM_HI;
 	case LEU: return ARM_LS;
 	case LTU: return ARM_CC;
-	default: gcc_unreachable ();
+	default: return ARM_NV;
 	}
 
     case CC_NCVmode:
@@ -17198,7 +18098,7 @@
 	case LT: return ARM_LT;
 	case GEU: return ARM_CS;
 	case LTU: return ARM_CC;
-	default: gcc_unreachable ();
+	default: return ARM_NV;
 	}
 
     case CCmode:
@@ -17214,13 +18114,22 @@
 	case GTU: return ARM_HI;
 	case LEU: return ARM_LS;
 	case LTU: return ARM_CC;
-	default: gcc_unreachable ();
+	default: return ARM_NV;
 	}
 
     default: gcc_unreachable ();
     }
 }
 
+/* Like maybe_get_arm_condition_code, but never return ARM_NV.  */
+static enum arm_cond_code
+get_arm_condition_code (rtx comparison)
+{
+  enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
+  gcc_assert (code != ARM_NV);
+  return code;
+}
+
 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
    instructions.  */
 void
@@ -17832,920 +18741,612 @@
   return value;
 }
 
-#define def_mbuiltin(MASK, NAME, TYPE, CODE)				\
-  do									\
-    {									\
-      if ((MASK) & insn_flags)						\
-        add_builtin_function ((NAME), (TYPE), (CODE),			\
-			     BUILT_IN_MD, NULL, NULL_TREE);		\
-    }									\
-  while (0)
+typedef enum {
+  T_V8QI,
+  T_V4HI,
+  T_V2SI,
+  T_V2SF,
+  T_DI,
+  T_V16QI,
+  T_V8HI,
+  T_V4SI,
+  T_V4SF,
+  T_V2DI,
+  T_TI,
+  T_EI,
+  T_OI,
+  T_MAX		/* Size of enum.  Keep last.  */
+} neon_builtin_type_mode;
+
+#define TYPE_MODE_BIT(X) (1 << (X))
+
+#define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI)	\
+		 | TYPE_MODE_BIT (T_V2SI) | TYPE_MODE_BIT (T_V2SF)	\
+		 | TYPE_MODE_BIT (T_DI))
+#define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI)	\
+		 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF)	\
+		 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
 
-struct builtin_description
-{
-  const unsigned int       mask;
-  const enum insn_code     icode;
-  const char * const       name;
-  const enum arm_builtins  code;
-  const enum rtx_code      comparison;
-  const unsigned int       flag;
-};
+#define v8qi_UP  T_V8QI
+#define v4hi_UP  T_V4HI
+#define v2si_UP  T_V2SI
+#define v2sf_UP  T_V2SF
+#define di_UP    T_DI
+#define v16qi_UP T_V16QI
+#define v8hi_UP  T_V8HI
+#define v4si_UP  T_V4SI
+#define v4sf_UP  T_V4SF
+#define v2di_UP  T_V2DI
+#define ti_UP	 T_TI
+#define ei_UP	 T_EI
+#define oi_UP	 T_OI
 
-static const struct builtin_description bdesc_2arg[] =
-{
-#define IWMMXT_BUILTIN(code, string, builtin) \
-  { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
-    ARM_BUILTIN_##builtin, UNKNOWN, 0 },
+#define UP(X) X##_UP
 
-  IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
-  IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
-  IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
-  IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
-  IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
-  IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
-  IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
-  IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
-  IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
-  IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
-  IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
-  IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
-  IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
-  IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
-  IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
-  IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
-  IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
-  IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
-  IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
-  IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
-  IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
-  IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
-  IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
-  IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
-  IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
-  IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
-  IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
-  IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
-  IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
-  IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
-  IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
-  IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
-  IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
-  IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
-  IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
-  IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
-  IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
-  IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
-  IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
-  IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
-  IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
-  IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
-  IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
-  IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
-  IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
-  IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
-  IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
-  IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
-  IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
-  IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
-  IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
-  IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
-  IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
-  IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
-  IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
-  IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
-  IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
-  IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
+typedef enum {
+  NEON_BINOP,
+  NEON_TERNOP,
+  NEON_UNOP,
+  NEON_GETLANE,
+  NEON_SETLANE,
+  NEON_CREATE,
+  NEON_DUP,
+  NEON_DUPLANE,
+  NEON_COMBINE,
+  NEON_SPLIT,
+  NEON_LANEMUL,
+  NEON_LANEMULL,
+  NEON_LANEMULH,
+  NEON_LANEMAC,
+  NEON_SCALARMUL,
+  NEON_SCALARMULL,
+  NEON_SCALARMULH,
+  NEON_SCALARMAC,
+  NEON_CONVERT,
+  NEON_FIXCONV,
+  NEON_SELECT,
+  NEON_RESULTPAIR,
+  NEON_REINTERP,
+  NEON_VTBL,
+  NEON_VTBX,
+  NEON_LOAD1,
+  NEON_LOAD1LANE,
+  NEON_STORE1,
+  NEON_STORE1LANE,
+  NEON_LOADSTRUCT,
+  NEON_LOADSTRUCTLANE,
+  NEON_STORESTRUCT,
+  NEON_STORESTRUCTLANE,
+  NEON_LOGICBINOP,
+  NEON_SHIFTINSERT,
+  NEON_SHIFTIMM,
+  NEON_SHIFTACC
+} neon_itype;
 
-#define IWMMXT_BUILTIN2(code, builtin) \
-  { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
+typedef struct {
+  const char *name;
+  const neon_itype itype;
+  const neon_builtin_type_mode mode;
+  const enum insn_code code;
+  unsigned int fcode;
+} neon_builtin_datum;
 
-  IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
-  IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
-  IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
-  IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
-  IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
-  IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
-  IWMMXT_BUILTIN2 (ashlv4hi3_di,    WSLLH)
-  IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
-  IWMMXT_BUILTIN2 (ashlv2si3_di,    WSLLW)
-  IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
-  IWMMXT_BUILTIN2 (ashldi3_di,      WSLLD)
-  IWMMXT_BUILTIN2 (ashldi3_iwmmxt,  WSLLDI)
-  IWMMXT_BUILTIN2 (lshrv4hi3_di,    WSRLH)
-  IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
-  IWMMXT_BUILTIN2 (lshrv2si3_di,    WSRLW)
-  IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
-  IWMMXT_BUILTIN2 (lshrdi3_di,      WSRLD)
-  IWMMXT_BUILTIN2 (lshrdi3_iwmmxt,  WSRLDI)
-  IWMMXT_BUILTIN2 (ashrv4hi3_di,    WSRAH)
-  IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
-  IWMMXT_BUILTIN2 (ashrv2si3_di,    WSRAW)
-  IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
-  IWMMXT_BUILTIN2 (ashrdi3_di,      WSRAD)
-  IWMMXT_BUILTIN2 (ashrdi3_iwmmxt,  WSRADI)
-  IWMMXT_BUILTIN2 (rorv4hi3_di,     WRORH)
-  IWMMXT_BUILTIN2 (rorv4hi3,        WRORHI)
-  IWMMXT_BUILTIN2 (rorv2si3_di,     WRORW)
-  IWMMXT_BUILTIN2 (rorv2si3,        WRORWI)
-  IWMMXT_BUILTIN2 (rordi3_di,       WRORD)
-  IWMMXT_BUILTIN2 (rordi3,          WRORDI)
-  IWMMXT_BUILTIN2 (iwmmxt_wmacuz,   WMACUZ)
-  IWMMXT_BUILTIN2 (iwmmxt_wmacsz,   WMACSZ)
-};
+#define CF(N,X) CODE_FOR_neon_##N##X
 
-static const struct builtin_description bdesc_1arg[] =
+#define VAR1(T, N, A) \
+  {#N, NEON_##T, UP (A), CF (N, A), 0}
+#define VAR2(T, N, A, B) \
+  VAR1 (T, N, A), \
+  {#N, NEON_##T, UP (B), CF (N, B), 0}
+#define VAR3(T, N, A, B, C) \
+  VAR2 (T, N, A, B), \
+  {#N, NEON_##T, UP (C), CF (N, C), 0}
+#define VAR4(T, N, A, B, C, D) \
+  VAR3 (T, N, A, B, C), \
+  {#N, NEON_##T, UP (D), CF (N, D), 0}
+#define VAR5(T, N, A, B, C, D, E) \
+  VAR4 (T, N, A, B, C, D), \
+  {#N, NEON_##T, UP (E), CF (N, E), 0}
+#define VAR6(T, N, A, B, C, D, E, F) \
+  VAR5 (T, N, A, B, C, D, E), \
+  {#N, NEON_##T, UP (F), CF (N, F), 0}
+#define VAR7(T, N, A, B, C, D, E, F, G) \
+  VAR6 (T, N, A, B, C, D, E, F), \
+  {#N, NEON_##T, UP (G), CF (N, G), 0}
+#define VAR8(T, N, A, B, C, D, E, F, G, H) \
+  VAR7 (T, N, A, B, C, D, E, F, G), \
+  {#N, NEON_##T, UP (H), CF (N, H), 0}
+#define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
+  VAR8 (T, N, A, B, C, D, E, F, G, H), \
+  {#N, NEON_##T, UP (I), CF (N, I), 0}
+#define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
+  VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
+  {#N, NEON_##T, UP (J), CF (N, J), 0}
+
+/* The mode entries in the following table correspond to the "key" type of the
+   instruction variant, i.e. equivalent to that which would be specified after
+   the assembler mnemonic, which usually refers to the last vector operand.
+   (Signed/unsigned/polynomial types are not differentiated between though, and
+   are all mapped onto the same mode for a given element size.) The modes
+   listed per instruction should be the same as those defined for that
+   instruction's pattern in neon.md.  */
+
+static neon_builtin_datum neon_builtin_data[] =
 {
-  IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
-  IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
-  IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
-  IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
-  IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
-  IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
-  IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
-  IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
-  IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
-  IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
-  IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
-  IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
-  IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
-  IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
-  IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
-  IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
-  IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
-  IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
+  VAR10 (BINOP, vadd,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR3 (BINOP, vaddl, v8qi, v4hi, v2si),
+  VAR3 (BINOP, vaddw, v8qi, v4hi, v2si),
+  VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
+  VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
+  VAR3 (BINOP, vaddhn, v8hi, v4si, v2di),
+  VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+  VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+  VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si),
+  VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+  VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si),
+  VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si),
+  VAR2 (TERNOP, vqdmlal, v4hi, v2si),
+  VAR2 (TERNOP, vqdmlsl, v4hi, v2si),
+  VAR3 (BINOP, vmull, v8qi, v4hi, v2si),
+  VAR2 (SCALARMULL, vmull_n, v4hi, v2si),
+  VAR2 (LANEMULL, vmull_lane, v4hi, v2si),
+  VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si),
+  VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si),
+  VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si),
+  VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si),
+  VAR2 (BINOP, vqdmull, v4hi, v2si),
+  VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
+  VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
+  VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
+  VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di),
+  VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di),
+  VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di),
+  VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
+  VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
+  VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
+  VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si),
+  VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
+  VAR10 (BINOP, vsub,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR3 (BINOP, vsubl, v8qi, v4hi, v2si),
+  VAR3 (BINOP, vsubw, v8qi, v4hi, v2si),
+  VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
+  VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
+  VAR3 (BINOP, vsubhn, v8hi, v4si, v2di),
+  VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+  VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+  VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+  VAR2 (BINOP, vcage, v2sf, v4sf),
+  VAR2 (BINOP, vcagt, v2sf, v4sf),
+  VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
+  VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+  VAR3 (BINOP, vabdl, v8qi, v4hi, v2si),
+  VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
+  VAR3 (TERNOP, vabal, v8qi, v4hi, v2si),
+  VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+  VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+  VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf),
+  VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
+  VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
+  VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf),
+  VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf),
+  VAR2 (BINOP, vrecps, v2sf, v4sf),
+  VAR2 (BINOP, vrsqrts, v2sf, v4sf),
+  VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
+  VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
+  VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+  VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
+  VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+  VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
+  VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
+  VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
+  VAR2 (UNOP, vcnt, v8qi, v16qi),
+  VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf),
+  VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf),
+  VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
+  /* FIXME: vget_lane supports more variants than this!  */
+  VAR10 (GETLANE, vget_lane,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR10 (SETLANE, vset_lane,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di),
+  VAR10 (DUP, vdup_n,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR10 (DUPLANE, vdup_lane,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di),
+  VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR3 (UNOP, vmovn, v8hi, v4si, v2di),
+  VAR3 (UNOP, vqmovn, v8hi, v4si, v2di),
+  VAR3 (UNOP, vqmovun, v8hi, v4si, v2di),
+  VAR3 (UNOP, vmovl, v8qi, v4hi, v2si),
+  VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+  VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+  VAR2 (LANEMAC, vmlal_lane, v4hi, v2si),
+  VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si),
+  VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+  VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si),
+  VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si),
+  VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+  VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+  VAR2 (SCALARMAC, vmlal_n, v4hi, v2si),
+  VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si),
+  VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+  VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si),
+  VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si),
+  VAR10 (BINOP, vext,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+  VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi),
+  VAR2 (UNOP, vrev16, v8qi, v16qi),
+  VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf),
+  VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf),
+  VAR10 (SELECT, vbsl,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR1 (VTBL, vtbl1, v8qi),
+  VAR1 (VTBL, vtbl2, v8qi),
+  VAR1 (VTBL, vtbl3, v8qi),
+  VAR1 (VTBL, vtbl4, v8qi),
+  VAR1 (VTBX, vtbx1, v8qi),
+  VAR1 (VTBX, vtbx2, v8qi),
+  VAR1 (VTBX, vtbx3, v8qi),
+  VAR1 (VTBX, vtbx4, v8qi),
+  VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+  VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+  VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+  VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di),
+  VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di),
+  VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di),
+  VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di),
+  VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di),
+  VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR10 (LOAD1, vld1,
+         v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR10 (LOAD1LANE, vld1_lane,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR10 (LOAD1, vld1_dup,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR10 (STORE1, vst1,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR10 (STORE1LANE, vst1_lane,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR9 (LOADSTRUCT,
+	vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
+  VAR7 (LOADSTRUCTLANE, vld2_lane,
+	v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+  VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di),
+  VAR9 (STORESTRUCT, vst2,
+	v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
+  VAR7 (STORESTRUCTLANE, vst2_lane,
+	v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+  VAR9 (LOADSTRUCT,
+	vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
+  VAR7 (LOADSTRUCTLANE, vld3_lane,
+	v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+  VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di),
+  VAR9 (STORESTRUCT, vst3,
+	v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
+  VAR7 (STORESTRUCTLANE, vst3_lane,
+	v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+  VAR9 (LOADSTRUCT, vld4,
+	v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
+  VAR7 (LOADSTRUCTLANE, vld4_lane,
+	v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+  VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di),
+  VAR9 (STORESTRUCT, vst4,
+	v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
+  VAR7 (STORESTRUCTLANE, vst4_lane,
+	v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+  VAR10 (LOGICBINOP, vand,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR10 (LOGICBINOP, vorr,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR10 (BINOP, veor,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR10 (LOGICBINOP, vbic,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+  VAR10 (LOGICBINOP, vorn,
+	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
+};
+
+#undef CF
+#undef VAR1
+#undef VAR2
+#undef VAR3
+#undef VAR4
+#undef VAR5
+#undef VAR6
+#undef VAR7
+#undef VAR8
+#undef VAR9
+#undef VAR10
+
+/* Neon defines builtins from ARM_BUILTIN_MAX upwards, though they don't have
+   symbolic names defined here (which would require too much duplication).
+   FIXME?  */
+enum arm_builtins
+{
+  ARM_BUILTIN_GETWCX,
+  ARM_BUILTIN_SETWCX,
+
+  ARM_BUILTIN_WZERO,
+
+  ARM_BUILTIN_WAVG2BR,
+  ARM_BUILTIN_WAVG2HR,
+  ARM_BUILTIN_WAVG2B,
+  ARM_BUILTIN_WAVG2H,
+
+  ARM_BUILTIN_WACCB,
+  ARM_BUILTIN_WACCH,
+  ARM_BUILTIN_WACCW,
+
+  ARM_BUILTIN_WMACS,
+  ARM_BUILTIN_WMACSZ,
+  ARM_BUILTIN_WMACU,
+  ARM_BUILTIN_WMACUZ,
+
+  ARM_BUILTIN_WSADB,
+  ARM_BUILTIN_WSADBZ,
+  ARM_BUILTIN_WSADH,
+  ARM_BUILTIN_WSADHZ,
+
+  ARM_BUILTIN_WALIGN,
+
+  ARM_BUILTIN_TMIA,
+  ARM_BUILTIN_TMIAPH,
+  ARM_BUILTIN_TMIABB,
+  ARM_BUILTIN_TMIABT,
+  ARM_BUILTIN_TMIATB,
+  ARM_BUILTIN_TMIATT,
+
+  ARM_BUILTIN_TMOVMSKB,
+  ARM_BUILTIN_TMOVMSKH,
+  ARM_BUILTIN_TMOVMSKW,
+
+  ARM_BUILTIN_TBCSTB,
+  ARM_BUILTIN_TBCSTH,
+  ARM_BUILTIN_TBCSTW,
+
+  ARM_BUILTIN_WMADDS,
+  ARM_BUILTIN_WMADDU,
+
+  ARM_BUILTIN_WPACKHSS,
+  ARM_BUILTIN_WPACKWSS,
+  ARM_BUILTIN_WPACKDSS,
+  ARM_BUILTIN_WPACKHUS,
+  ARM_BUILTIN_WPACKWUS,
+  ARM_BUILTIN_WPACKDUS,
+
+  ARM_BUILTIN_WADDB,
+  ARM_BUILTIN_WADDH,
+  ARM_BUILTIN_WADDW,
+  ARM_BUILTIN_WADDSSB,
+  ARM_BUILTIN_WADDSSH,
+  ARM_BUILTIN_WADDSSW,
+  ARM_BUILTIN_WADDUSB,
+  ARM_BUILTIN_WADDUSH,
+  ARM_BUILTIN_WADDUSW,
+  ARM_BUILTIN_WSUBB,
+  ARM_BUILTIN_WSUBH,
+  ARM_BUILTIN_WSUBW,
+  ARM_BUILTIN_WSUBSSB,
+  ARM_BUILTIN_WSUBSSH,
+  ARM_BUILTIN_WSUBSSW,
+  ARM_BUILTIN_WSUBUSB,
+  ARM_BUILTIN_WSUBUSH,
+  ARM_BUILTIN_WSUBUSW,
+
+  ARM_BUILTIN_WAND,
+  ARM_BUILTIN_WANDN,
+  ARM_BUILTIN_WOR,
+  ARM_BUILTIN_WXOR,
+
+  ARM_BUILTIN_WCMPEQB,
+  ARM_BUILTIN_WCMPEQH,
+  ARM_BUILTIN_WCMPEQW,
+  ARM_BUILTIN_WCMPGTUB,
+  ARM_BUILTIN_WCMPGTUH,
+  ARM_BUILTIN_WCMPGTUW,
+  ARM_BUILTIN_WCMPGTSB,
+  ARM_BUILTIN_WCMPGTSH,
+  ARM_BUILTIN_WCMPGTSW,
+
+  ARM_BUILTIN_TEXTRMSB,
+  ARM_BUILTIN_TEXTRMSH,
+  ARM_BUILTIN_TEXTRMSW,
+  ARM_BUILTIN_TEXTRMUB,
+  ARM_BUILTIN_TEXTRMUH,
+  ARM_BUILTIN_TEXTRMUW,
+  ARM_BUILTIN_TINSRB,
+  ARM_BUILTIN_TINSRH,
+  ARM_BUILTIN_TINSRW,
+
+  ARM_BUILTIN_WMAXSW,
+  ARM_BUILTIN_WMAXSH,
+  ARM_BUILTIN_WMAXSB,
+  ARM_BUILTIN_WMAXUW,
+  ARM_BUILTIN_WMAXUH,
+  ARM_BUILTIN_WMAXUB,
+  ARM_BUILTIN_WMINSW,
+  ARM_BUILTIN_WMINSH,
+  ARM_BUILTIN_WMINSB,
+  ARM_BUILTIN_WMINUW,
+  ARM_BUILTIN_WMINUH,
+  ARM_BUILTIN_WMINUB,
+
+  ARM_BUILTIN_WMULUM,
+  ARM_BUILTIN_WMULSM,
+  ARM_BUILTIN_WMULUL,
+
+  ARM_BUILTIN_PSADBH,
+  ARM_BUILTIN_WSHUFH,
+
+  ARM_BUILTIN_WSLLH,
+  ARM_BUILTIN_WSLLW,
+  ARM_BUILTIN_WSLLD,
+  ARM_BUILTIN_WSRAH,
+  ARM_BUILTIN_WSRAW,
+  ARM_BUILTIN_WSRAD,
+  ARM_BUILTIN_WSRLH,
+  ARM_BUILTIN_WSRLW,
+  ARM_BUILTIN_WSRLD,
+  ARM_BUILTIN_WRORH,
+  ARM_BUILTIN_WRORW,
+  ARM_BUILTIN_WRORD,
+  ARM_BUILTIN_WSLLHI,
+  ARM_BUILTIN_WSLLWI,
+  ARM_BUILTIN_WSLLDI,
+  ARM_BUILTIN_WSRAHI,
+  ARM_BUILTIN_WSRAWI,
+  ARM_BUILTIN_WSRADI,
+  ARM_BUILTIN_WSRLHI,
+  ARM_BUILTIN_WSRLWI,
+  ARM_BUILTIN_WSRLDI,
+  ARM_BUILTIN_WRORHI,
+  ARM_BUILTIN_WRORWI,
+  ARM_BUILTIN_WRORDI,
+
+  ARM_BUILTIN_WUNPCKIHB,
+  ARM_BUILTIN_WUNPCKIHH,
+  ARM_BUILTIN_WUNPCKIHW,
+  ARM_BUILTIN_WUNPCKILB,
+  ARM_BUILTIN_WUNPCKILH,
+  ARM_BUILTIN_WUNPCKILW,
+
+  ARM_BUILTIN_WUNPCKEHSB,
+  ARM_BUILTIN_WUNPCKEHSH,
+  ARM_BUILTIN_WUNPCKEHSW,
+  ARM_BUILTIN_WUNPCKEHUB,
+  ARM_BUILTIN_WUNPCKEHUH,
+  ARM_BUILTIN_WUNPCKEHUW,
+  ARM_BUILTIN_WUNPCKELSB,
+  ARM_BUILTIN_WUNPCKELSH,
+  ARM_BUILTIN_WUNPCKELSW,
+  ARM_BUILTIN_WUNPCKELUB,
+  ARM_BUILTIN_WUNPCKELUH,
+  ARM_BUILTIN_WUNPCKELUW,
+
+  ARM_BUILTIN_THREAD_POINTER,
+
+  ARM_BUILTIN_NEON_BASE,
+
+  ARM_BUILTIN_MAX = ARM_BUILTIN_NEON_BASE + ARRAY_SIZE (neon_builtin_data)
 };
 
-/* Set up all the iWMMXt builtins.  This is
-   not called if TARGET_IWMMXT is zero.  */
+static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
 
 static void
-arm_init_iwmmxt_builtins (void)
+arm_init_neon_builtins (void)
 {
-  const struct builtin_description * d;
-  size_t i;
-  tree endlink = void_list_node;
+  unsigned int i, fcode;
+  tree decl;
 
-  tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
-  tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
-  tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
+  tree neon_intQI_type_node;
+  tree neon_intHI_type_node;
+  tree neon_polyQI_type_node;
+  tree neon_polyHI_type_node;
+  tree neon_intSI_type_node;
+  tree neon_intDI_type_node;
+  tree neon_float_type_node;
 
-  tree int_ftype_int
-    = build_function_type (integer_type_node,
-			   tree_cons (NULL_TREE, integer_type_node, endlink));
-  tree v8qi_ftype_v8qi_v8qi_int
-    = build_function_type (V8QI_type_node,
-			   tree_cons (NULL_TREE, V8QI_type_node,
-				      tree_cons (NULL_TREE, V8QI_type_node,
-						 tree_cons (NULL_TREE,
-							    integer_type_node,
-							    endlink))));
-  tree v4hi_ftype_v4hi_int
-    = build_function_type (V4HI_type_node,
-			   tree_cons (NULL_TREE, V4HI_type_node,
-				      tree_cons (NULL_TREE, integer_type_node,
-						 endlink)));
-  tree v2si_ftype_v2si_int
-    = build_function_type (V2SI_type_node,
-			   tree_cons (NULL_TREE, V2SI_type_node,
-				      tree_cons (NULL_TREE, integer_type_node,
-						 endlink)));
-  tree v2si_ftype_di_di
-    = build_function_type (V2SI_type_node,
-			   tree_cons (NULL_TREE, long_long_integer_type_node,
-				      tree_cons (NULL_TREE, long_long_integer_type_node,
-						 endlink)));
-  tree di_ftype_di_int
-    = build_function_type (long_long_integer_type_node,
-			   tree_cons (NULL_TREE, long_long_integer_type_node,
-				      tree_cons (NULL_TREE, integer_type_node,
-						 endlink)));
-  tree di_ftype_di_int_int
-    = build_function_type (long_long_integer_type_node,
-			   tree_cons (NULL_TREE, long_long_integer_type_node,
-				      tree_cons (NULL_TREE, integer_type_node,
-						 tree_cons (NULL_TREE,
-							    integer_type_node,
-							    endlink))));
-  tree int_ftype_v8qi
-    = build_function_type (integer_type_node,
-			   tree_cons (NULL_TREE, V8QI_type_node,
-				      endlink));
-  tree int_ftype_v4hi
-    = build_function_type (integer_type_node,
-			   tree_cons (NULL_TREE, V4HI_type_node,
-				      endlink));
-  tree int_ftype_v2si
-    = build_function_type (integer_type_node,
-			   tree_cons (NULL_TREE, V2SI_type_node,
-				      endlink));
-  tree int_ftype_v8qi_int
-    = build_function_type (integer_type_node,
-			   tree_cons (NULL_TREE, V8QI_type_node,
-				      tree_cons (NULL_TREE, integer_type_node,
-						 endlink)));
-  tree int_ftype_v4hi_int
-    = build_function_type (integer_type_node,
-			   tree_cons (NULL_TREE, V4HI_type_node,
-				      tree_cons (NULL_TREE, integer_type_node,
-						 endlink)));
-  tree int_ftype_v2si_int
-    = build_function_type (integer_type_node,
-			   tree_cons (NULL_TREE, V2SI_type_node,
-				      tree_cons (NULL_TREE, integer_type_node,
-						 endlink)));
-  tree v8qi_ftype_v8qi_int_int
-    = build_function_type (V8QI_type_node,
-			   tree_cons (NULL_TREE, V8QI_type_node,
-				      tree_cons (NULL_TREE, integer_type_node,
-						 tree_cons (NULL_TREE,
-							    integer_type_node,
-							    endlink))));
-  tree v4hi_ftype_v4hi_int_int
-    = build_function_type (V4HI_type_node,
-			   tree_cons (NULL_TREE, V4HI_type_node,
-				      tree_cons (NULL_TREE, integer_type_node,
-						 tree_cons (NULL_TREE,
-							    integer_type_node,
-							    endlink))));
-  tree v2si_ftype_v2si_int_int
-    = build_function_type (V2SI_type_node,
-			   tree_cons (NULL_TREE, V2SI_type_node,
-				      tree_cons (NULL_TREE, integer_type_node,
-						 tree_cons (NULL_TREE,
-							    integer_type_node,
-							    endlink))));
-  /* Miscellaneous.  */
-  tree v8qi_ftype_v4hi_v4hi
-    = build_function_type (V8QI_type_node,
-			   tree_cons (NULL_TREE, V4HI_type_node,
-				      tree_cons (NULL_TREE, V4HI_type_node,
-						 endlink)));
-  tree v4hi_ftype_v2si_v2si
-    = build_function_type (V4HI_type_node,
-			   tree_cons (NULL_TREE, V2SI_type_node,
-				      tree_cons (NULL_TREE, V2SI_type_node,
-						 endlink)));
-  tree v2si_ftype_v4hi_v4hi
-    = build_function_type (V2SI_type_node,
-			   tree_cons (NULL_TREE, V4HI_type_node,
-				      tree_cons (NULL_TREE, V4HI_type_node,
-						 endlink)));
-  tree v2si_ftype_v8qi_v8qi
-    = build_function_type (V2SI_type_node,
-			   tree_cons (NULL_TREE, V8QI_type_node,
-				      tree_cons (NULL_TREE, V8QI_type_node,
-						 endlink)));
-  tree v4hi_ftype_v4hi_di
-    = build_function_type (V4HI_type_node,
-			   tree_cons (NULL_TREE, V4HI_type_node,
-				      tree_cons (NULL_TREE,
-						 long_long_integer_type_node,
-						 endlink)));
-  tree v2si_ftype_v2si_di
-    = build_function_type (V2SI_type_node,
-			   tree_cons (NULL_TREE, V2SI_type_node,
-				      tree_cons (NULL_TREE,
-						 long_long_integer_type_node,
-						 endlink)));
-  tree void_ftype_int_int
-    = build_function_type (void_type_node,
-			   tree_cons (NULL_TREE, integer_type_node,
-				      tree_cons (NULL_TREE, integer_type_node,
-						 endlink)));
-  tree di_ftype_void
-    = build_function_type (long_long_unsigned_type_node, endlink);
-  tree di_ftype_v8qi
-    = build_function_type (long_long_integer_type_node,
-			   tree_cons (NULL_TREE, V8QI_type_node,
-				      endlink));
-  tree di_ftype_v4hi
-    = build_function_type (long_long_integer_type_node,
-			   tree_cons (NULL_TREE, V4HI_type_node,
-				      endlink));
-  tree di_ftype_v2si
-    = build_function_type (long_long_integer_type_node,
-			   tree_cons (NULL_TREE, V2SI_type_node,
-				      endlink));
-  tree v2si_ftype_v4hi
-    = build_function_type (V2SI_type_node,
-			   tree_cons (NULL_TREE, V4HI_type_node,
-				      endlink));
-  tree v4hi_ftype_v8qi
-    = build_function_type (V4HI_type_node,
-			   tree_cons (NULL_TREE, V8QI_type_node,
-				      endlink));
-
-  tree di_ftype_di_v4hi_v4hi
-    = build_function_type (long_long_unsigned_type_node,
-			   tree_cons (NULL_TREE,
-				      long_long_unsigned_type_node,
-				      tree_cons (NULL_TREE, V4HI_type_node,
-						 tree_cons (NULL_TREE,
-							    V4HI_type_node,
-							    endlink))));
+  tree intQI_pointer_node;
+  tree intHI_pointer_node;
+  tree intSI_pointer_node;
+  tree intDI_pointer_node;
+  tree float_pointer_node;
 
-  tree di_ftype_v4hi_v4hi
-    = build_function_type (long_long_unsigned_type_node,
-			   tree_cons (NULL_TREE, V4HI_type_node,
-				      tree_cons (NULL_TREE, V4HI_type_node,
-						 endlink)));
+  tree const_intQI_node;
+  tree const_intHI_node;
+  tree const_intSI_node;
+  tree const_intDI_node;
+  tree const_float_node;
 
-  /* Normal vector binops.  */
-  tree v8qi_ftype_v8qi_v8qi
-    = build_function_type (V8QI_type_node,
-			   tree_cons (NULL_TREE, V8QI_type_node,
-				      tree_cons (NULL_TREE, V8QI_type_node,
-						 endlink)));
-  tree v4hi_ftype_v4hi_v4hi
-    = build_function_type (V4HI_type_node,
-			   tree_cons (NULL_TREE, V4HI_type_node,
-				      tree_cons (NULL_TREE, V4HI_type_node,
-						 endlink)));
-  tree v2si_ftype_v2si_v2si
-    = build_function_type (V2SI_type_node,
-			   tree_cons (NULL_TREE, V2SI_type_node,
-				      tree_cons (NULL_TREE, V2SI_type_node,
-						 endlink)));
-  tree di_ftype_di_di
-    = build_function_type (long_long_unsigned_type_node,
-			   tree_cons (NULL_TREE, long_long_unsigned_type_node,
-				      tree_cons (NULL_TREE,
-						 long_long_unsigned_type_node,
-						 endlink)));
+  tree const_intQI_pointer_node;
+  tree const_intHI_pointer_node;
+  tree const_intSI_pointer_node;
+  tree const_intDI_pointer_node;
+  tree const_float_pointer_node;
 
-  /* Add all builtins that are more or less simple operations on two
-     operands.  */
-  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
-    {
-      /* Use one of the operands; the target can have a different mode for
-	 mask-generating compares.  */
-      enum machine_mode mode;
-      tree type;
+  tree V8QI_type_node;
+  tree V4HI_type_node;
+  tree V2SI_type_node;
+  tree V2SF_type_node;
+  tree V16QI_type_node;
+  tree V8HI_type_node;
+  tree V4SI_type_node;
+  tree V4SF_type_node;
+  tree V2DI_type_node;
 
-      if (d->name == 0)
-	continue;
+  tree intUQI_type_node;
+  tree intUHI_type_node;
+  tree intUSI_type_node;
+  tree intUDI_type_node;
 
-      mode = insn_data[d->icode].operand[1].mode;
+  tree intEI_type_node;
+  tree intOI_type_node;
+  tree intCI_type_node;
+  tree intXI_type_node;
 
-      switch (mode)
-	{
-	case V8QImode:
-	  type = v8qi_ftype_v8qi_v8qi;
-	  break;
-	case V4HImode:
-	  type = v4hi_ftype_v4hi_v4hi;
-	  break;
-	case V2SImode:
-	  type = v2si_ftype_v2si_v2si;
-	  break;
-	case DImode:
-	  type = di_ftype_di_di;
-	  break;
+  tree V8QI_pointer_node;
+  tree V4HI_pointer_node;
+  tree V2SI_pointer_node;
+  tree V2SF_pointer_node;
+  tree V16QI_pointer_node;
+  tree V8HI_pointer_node;
+  tree V4SI_pointer_node;
+  tree V4SF_pointer_node;
+  tree V2DI_pointer_node;
 
-	default:
-	  gcc_unreachable ();
-	}
+  tree void_ftype_pv8qi_v8qi_v8qi;
+  tree void_ftype_pv4hi_v4hi_v4hi;
+  tree void_ftype_pv2si_v2si_v2si;
+  tree void_ftype_pv2sf_v2sf_v2sf;
+  tree void_ftype_pdi_di_di;
+  tree void_ftype_pv16qi_v16qi_v16qi;
+  tree void_ftype_pv8hi_v8hi_v8hi;
+  tree void_ftype_pv4si_v4si_v4si;
+  tree void_ftype_pv4sf_v4sf_v4sf;
+  tree void_ftype_pv2di_v2di_v2di;
 
-      def_mbuiltin (d->mask, d->name, type, d->code);
-    }
+  tree reinterp_ftype_dreg[5][5];
+  tree reinterp_ftype_qreg[5][5];
+  tree dreg_types[5], qreg_types[5];
 
-  /* Add the remaining MMX insns with somewhat more complicated types.  */
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
-
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
-
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
-
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
-
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
-
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
-
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
-
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
-
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
-
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
-
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
-
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
-
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
-
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
-  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
-}
+  /* Create distinguished type nodes for NEON vector element types,
+     and pointers to values of such types, so we can detect them later.  */
+  neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
+  neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
+  neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
+  neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
+  neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
+  neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
+  neon_float_type_node = make_node (REAL_TYPE);
+  TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
+  layout_type (neon_float_type_node);
 
-static void
-arm_init_tls_builtins (void)
-{
-  tree ftype, decl;
-
-  ftype = build_function_type (ptr_type_node, void_list_node);
-  decl = add_builtin_function ("__builtin_thread_pointer", ftype,
-			       ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
-			       NULL, NULL_TREE);
-  TREE_NOTHROW (decl) = 1;
-  TREE_READONLY (decl) = 1;
-}
-
-enum neon_builtin_type_bits {
-  T_V8QI  = 0x0001,
-  T_V4HI  = 0x0002,
-  T_V2SI  = 0x0004,
-  T_V2SF  = 0x0008,
-  T_DI    = 0x0010,
-  T_V16QI = 0x0020,
-  T_V8HI  = 0x0040,
-  T_V4SI  = 0x0080,
-  T_V4SF  = 0x0100,
-  T_V2DI  = 0x0200,
-  T_TI	  = 0x0400,
-  T_EI	  = 0x0800,
-  T_OI	  = 0x1000
-};
-
-#define v8qi_UP  T_V8QI
-#define v4hi_UP  T_V4HI
-#define v2si_UP  T_V2SI
-#define v2sf_UP  T_V2SF
-#define di_UP    T_DI
-#define v16qi_UP T_V16QI
-#define v8hi_UP  T_V8HI
-#define v4si_UP  T_V4SI
-#define v4sf_UP  T_V4SF
-#define v2di_UP  T_V2DI
-#define ti_UP	 T_TI
-#define ei_UP	 T_EI
-#define oi_UP	 T_OI
-
-#define UP(X) X##_UP
-
-#define T_MAX 13
-
-typedef enum {
-  NEON_BINOP,
-  NEON_TERNOP,
-  NEON_UNOP,
-  NEON_GETLANE,
-  NEON_SETLANE,
-  NEON_CREATE,
-  NEON_DUP,
-  NEON_DUPLANE,
-  NEON_COMBINE,
-  NEON_SPLIT,
-  NEON_LANEMUL,
-  NEON_LANEMULL,
-  NEON_LANEMULH,
-  NEON_LANEMAC,
-  NEON_SCALARMUL,
-  NEON_SCALARMULL,
-  NEON_SCALARMULH,
-  NEON_SCALARMAC,
-  NEON_CONVERT,
-  NEON_FIXCONV,
-  NEON_SELECT,
-  NEON_RESULTPAIR,
-  NEON_REINTERP,
-  NEON_VTBL,
-  NEON_VTBX,
-  NEON_LOAD1,
-  NEON_LOAD1LANE,
-  NEON_STORE1,
-  NEON_STORE1LANE,
-  NEON_LOADSTRUCT,
-  NEON_LOADSTRUCTLANE,
-  NEON_STORESTRUCT,
-  NEON_STORESTRUCTLANE,
-  NEON_LOGICBINOP,
-  NEON_SHIFTINSERT,
-  NEON_SHIFTIMM,
-  NEON_SHIFTACC
-} neon_itype;
-
-typedef struct {
-  const char *name;
-  const neon_itype itype;
-  const int bits;
-  const enum insn_code codes[T_MAX];
-  const unsigned int num_vars;
-  unsigned int base_fcode;
-} neon_builtin_datum;
-
-#define CF(N,X) CODE_FOR_neon_##N##X
-
-#define VAR1(T, N, A) \
-  #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
-#define VAR2(T, N, A, B) \
-  #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
-#define VAR3(T, N, A, B, C) \
-  #N, NEON_##T, UP (A) | UP (B) | UP (C), \
-  { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
-#define VAR4(T, N, A, B, C, D) \
-  #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
-  { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
-#define VAR5(T, N, A, B, C, D, E) \
-  #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
-  { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
-#define VAR6(T, N, A, B, C, D, E, F) \
-  #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
-  { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
-#define VAR7(T, N, A, B, C, D, E, F, G) \
-  #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
-  { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
-    CF (N, G) }, 7, 0
-#define VAR8(T, N, A, B, C, D, E, F, G, H) \
-  #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
-                | UP (H), \
-  { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
-    CF (N, G), CF (N, H) }, 8, 0
-#define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
-  #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
-                | UP (H) | UP (I), \
-  { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
-    CF (N, G), CF (N, H), CF (N, I) }, 9, 0
-#define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
-  #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
-                | UP (H) | UP (I) | UP (J), \
-  { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
-    CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
-
-/* The mode entries in the following table correspond to the "key" type of the
-   instruction variant, i.e. equivalent to that which would be specified after
-   the assembler mnemonic, which usually refers to the last vector operand.
-   (Signed/unsigned/polynomial types are not differentiated between though, and
-   are all mapped onto the same mode for a given element size.) The modes
-   listed per instruction should be the same as those defined for that
-   instruction's pattern in neon.md.
-   WARNING: Variants should be listed in the same increasing order as
-   neon_builtin_type_bits.  */
-
-static neon_builtin_datum neon_builtin_data[] =
-{
-  { VAR10 (BINOP, vadd,
-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
-  { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
-  { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
-  { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
-  { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
-  { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
-  { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
-  { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
-  { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
-  { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
-  { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
-  { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
-  { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
-  { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
-  { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
-  { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
-  { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
-  { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
-  { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
-  { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
-  { VAR2 (BINOP, vqdmull, v4hi, v2si) },
-  { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
-  { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
-  { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
-  { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
-  { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
-  { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
-  { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
-  { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
-  { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
-  { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
-  { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
-  { VAR10 (BINOP, vsub,
-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
-  { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
-  { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
-  { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
-  { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
-  { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
-  { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
-  { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
-  { VAR2 (BINOP, vcage, v2sf, v4sf) },
-  { VAR2 (BINOP, vcagt, v2sf, v4sf) },
-  { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
-  { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
-  { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
-  { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
-  { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
-  { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
-  { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
-  { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
-  { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
-  { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
-  { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
-  { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
-  { VAR2 (BINOP, vrecps, v2sf, v4sf) },
-  { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
-  { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
-  { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
-  { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
-  { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
-  { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
-  { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
-  { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
-  { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
-  { VAR2 (UNOP, vcnt, v8qi, v16qi) },
-  { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
-  { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
-  { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
-  /* FIXME: vget_lane supports more variants than this!  */
-  { VAR10 (GETLANE, vget_lane,
-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR10 (SETLANE, vset_lane,
-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
-  { VAR10 (DUP, vdup_n,
-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR10 (DUPLANE, vdup_lane,
-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
-  { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
-  { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
-  { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
-  { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
-  { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
-  { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
-  { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
-  { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
-  { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
-  { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
-  { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
-  { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
-  { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
-  { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
-  { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
-  { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
-  { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
-  { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
-  { VAR10 (BINOP, vext,
-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
-  { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
-  { VAR2 (UNOP, vrev16, v8qi, v16qi) },
-  { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
-  { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
-  { VAR10 (SELECT, vbsl,
-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR1 (VTBL, vtbl1, v8qi) },
-  { VAR1 (VTBL, vtbl2, v8qi) },
-  { VAR1 (VTBL, vtbl3, v8qi) },
-  { VAR1 (VTBL, vtbl4, v8qi) },
-  { VAR1 (VTBX, vtbx1, v8qi) },
-  { VAR1 (VTBX, vtbx2, v8qi) },
-  { VAR1 (VTBX, vtbx3, v8qi) },
-  { VAR1 (VTBX, vtbx4, v8qi) },
-  { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
-  { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
-  { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
-  { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
-  { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
-  { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
-  { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
-  { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
-  { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR10 (LOAD1, vld1,
-           v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR10 (LOAD1LANE, vld1_lane,
-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR10 (LOAD1, vld1_dup,
-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR10 (STORE1, vst1,
-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR10 (STORE1LANE, vst1_lane,
-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR9 (LOADSTRUCT,
-	  vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
-  { VAR7 (LOADSTRUCTLANE, vld2_lane,
-	  v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
-  { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
-  { VAR9 (STORESTRUCT, vst2,
-	  v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
-  { VAR7 (STORESTRUCTLANE, vst2_lane,
-	  v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
-  { VAR9 (LOADSTRUCT,
-	  vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
-  { VAR7 (LOADSTRUCTLANE, vld3_lane,
-	  v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
-  { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
-  { VAR9 (STORESTRUCT, vst3,
-	  v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
-  { VAR7 (STORESTRUCTLANE, vst3_lane,
-	  v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
-  { VAR9 (LOADSTRUCT, vld4,
-	  v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
-  { VAR7 (LOADSTRUCTLANE, vld4_lane,
-	  v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
-  { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
-  { VAR9 (STORESTRUCT, vst4,
-	  v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
-  { VAR7 (STORESTRUCTLANE, vst4_lane,
-	  v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
-  { VAR10 (LOGICBINOP, vand,
-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR10 (LOGICBINOP, vorr,
-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR10 (BINOP, veor,
-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR10 (LOGICBINOP, vbic,
-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
-  { VAR10 (LOGICBINOP, vorn,
-	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
-};
-
-#undef CF
-#undef VAR1
-#undef VAR2
-#undef VAR3
-#undef VAR4
-#undef VAR5
-#undef VAR6
-#undef VAR7
-#undef VAR8
-#undef VAR9
-#undef VAR10
-
-static void
-arm_init_neon_builtins (void)
-{
-  unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
-
-  tree neon_intQI_type_node;
-  tree neon_intHI_type_node;
-  tree neon_polyQI_type_node;
-  tree neon_polyHI_type_node;
-  tree neon_intSI_type_node;
-  tree neon_intDI_type_node;
-  tree neon_float_type_node;
-
-  tree intQI_pointer_node;
-  tree intHI_pointer_node;
-  tree intSI_pointer_node;
-  tree intDI_pointer_node;
-  tree float_pointer_node;
-
-  tree const_intQI_node;
-  tree const_intHI_node;
-  tree const_intSI_node;
-  tree const_intDI_node;
-  tree const_float_node;
-
-  tree const_intQI_pointer_node;
-  tree const_intHI_pointer_node;
-  tree const_intSI_pointer_node;
-  tree const_intDI_pointer_node;
-  tree const_float_pointer_node;
-
-  tree V8QI_type_node;
-  tree V4HI_type_node;
-  tree V2SI_type_node;
-  tree V2SF_type_node;
-  tree V16QI_type_node;
-  tree V8HI_type_node;
-  tree V4SI_type_node;
-  tree V4SF_type_node;
-  tree V2DI_type_node;
-
-  tree intUQI_type_node;
-  tree intUHI_type_node;
-  tree intUSI_type_node;
-  tree intUDI_type_node;
-
-  tree intEI_type_node;
-  tree intOI_type_node;
-  tree intCI_type_node;
-  tree intXI_type_node;
-
-  tree V8QI_pointer_node;
-  tree V4HI_pointer_node;
-  tree V2SI_pointer_node;
-  tree V2SF_pointer_node;
-  tree V16QI_pointer_node;
-  tree V8HI_pointer_node;
-  tree V4SI_pointer_node;
-  tree V4SF_pointer_node;
-  tree V2DI_pointer_node;
-
-  tree void_ftype_pv8qi_v8qi_v8qi;
-  tree void_ftype_pv4hi_v4hi_v4hi;
-  tree void_ftype_pv2si_v2si_v2si;
-  tree void_ftype_pv2sf_v2sf_v2sf;
-  tree void_ftype_pdi_di_di;
-  tree void_ftype_pv16qi_v16qi_v16qi;
-  tree void_ftype_pv8hi_v8hi_v8hi;
-  tree void_ftype_pv4si_v4si_v4si;
-  tree void_ftype_pv4sf_v4sf_v4sf;
-  tree void_ftype_pv2di_v2di_v2di;
-
-  tree reinterp_ftype_dreg[5][5];
-  tree reinterp_ftype_qreg[5][5];
-  tree dreg_types[5], qreg_types[5];
-
-  /* Create distinguished type nodes for NEON vector element types,
-     and pointers to values of such types, so we can detect them later.  */
-  neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
-  neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
-  neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
-  neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
-  neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
-  neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
-  neon_float_type_node = make_node (REAL_TYPE);
-  TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
-  layout_type (neon_float_type_node);
-
-  /* Define typedefs which exactly correspond to the modes we are basing vector
-     types on.  If you change these names you'll need to change
-     the table used by arm_mangle_type too.  */
-  (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
-					     "__builtin_neon_qi");
-  (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
-					     "__builtin_neon_hi");
-  (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
-					     "__builtin_neon_si");
-  (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
-					     "__builtin_neon_sf");
-  (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
-					     "__builtin_neon_di");
-  (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
-					     "__builtin_neon_poly8");
-  (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
-					     "__builtin_neon_poly16");
+  /* Define typedefs which exactly correspond to the modes we are basing vector
+     types on.  If you change these names you'll need to change
+     the table used by arm_mangle_type too.  */
+  (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
+					     "__builtin_neon_qi");
+  (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
+					     "__builtin_neon_hi");
+  (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
+					     "__builtin_neon_si");
+  (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
+					     "__builtin_neon_sf");
+  (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
+					     "__builtin_neon_di");
+  (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
+					     "__builtin_neon_poly8");
+  (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
+					     "__builtin_neon_poly16");
 
   intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
   intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
@@ -18880,264 +19481,752 @@
   qreg_types[3] = V4SF_type_node;
   qreg_types[4] = V2DI_type_node;
 
-  for (i = 0; i < 5; i++)
-    {
-      int j;
-      for (j = 0; j < 5; j++)
-        {
-          reinterp_ftype_dreg[i][j]
-            = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
-          reinterp_ftype_qreg[i][j]
-            = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
-        }
-    }
+  for (i = 0; i < 5; i++)
+    {
+      int j;
+      for (j = 0; j < 5; j++)
+        {
+          reinterp_ftype_dreg[i][j]
+            = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
+          reinterp_ftype_qreg[i][j]
+            = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
+        }
+    }
+
+  for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
+       i < ARRAY_SIZE (neon_builtin_data);
+       i++, fcode++)
+    {
+      neon_builtin_datum *d = &neon_builtin_data[i];
+
+      const char* const modenames[] = {
+	"v8qi", "v4hi", "v2si", "v2sf", "di",
+	"v16qi", "v8hi", "v4si", "v4sf", "v2di",
+	"ti", "ei", "oi"
+      };
+      char namebuf[60];
+      tree ftype = NULL;
+      int is_load = 0, is_store = 0;
+
+      gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
+
+      d->fcode = fcode;
+
+      switch (d->itype)
+	{
+	case NEON_LOAD1:
+	case NEON_LOAD1LANE:
+	case NEON_LOADSTRUCT:
+	case NEON_LOADSTRUCTLANE:
+	  is_load = 1;
+	  /* Fall through.  */
+	case NEON_STORE1:
+	case NEON_STORE1LANE:
+	case NEON_STORESTRUCT:
+	case NEON_STORESTRUCTLANE:
+	  if (!is_load)
+	    is_store = 1;
+	  /* Fall through.  */
+	case NEON_UNOP:
+	case NEON_BINOP:
+	case NEON_LOGICBINOP:
+	case NEON_SHIFTINSERT:
+	case NEON_TERNOP:
+	case NEON_GETLANE:
+	case NEON_SETLANE:
+	case NEON_CREATE:
+	case NEON_DUP:
+	case NEON_DUPLANE:
+	case NEON_SHIFTIMM:
+	case NEON_SHIFTACC:
+	case NEON_COMBINE:
+	case NEON_SPLIT:
+	case NEON_CONVERT:
+	case NEON_FIXCONV:
+	case NEON_LANEMUL:
+	case NEON_LANEMULL:
+	case NEON_LANEMULH:
+	case NEON_LANEMAC:
+	case NEON_SCALARMUL:
+	case NEON_SCALARMULL:
+	case NEON_SCALARMULH:
+	case NEON_SCALARMAC:
+	case NEON_SELECT:
+	case NEON_VTBL:
+	case NEON_VTBX:
+	  {
+	    int k;
+	    tree return_type = void_type_node, args = void_list_node;
+
+	    /* Build a function type directly from the insn_data for
+	       this builtin.  The build_function_type() function takes
+	       care of removing duplicates for us.  */
+	    for (k = insn_data[d->code].n_operands - 1; k >= 0; k--)
+	      {
+		tree eltype;
+
+		if (is_load && k == 1)
+		  {
+		    /* Neon load patterns always have the memory
+		       operand in the operand 1 position.  */
+		    gcc_assert (insn_data[d->code].operand[k].predicate
+				== neon_struct_operand);
+
+		    switch (d->mode)
+		      {
+		      case T_V8QI:
+		      case T_V16QI:
+			eltype = const_intQI_pointer_node;
+			break;
+
+		      case T_V4HI:
+		      case T_V8HI:
+			eltype = const_intHI_pointer_node;
+			break;
+
+		      case T_V2SI:
+		      case T_V4SI:
+			eltype = const_intSI_pointer_node;
+			break;
+
+		      case T_V2SF:
+		      case T_V4SF:
+			eltype = const_float_pointer_node;
+			break;
+
+		      case T_DI:
+		      case T_V2DI:
+			eltype = const_intDI_pointer_node;
+			break;
+
+		      default: gcc_unreachable ();
+		      }
+		  }
+		else if (is_store && k == 0)
+		  {
+		    /* Similarly, Neon store patterns use operand 0 as
+		       the memory location to store to.  */
+		    gcc_assert (insn_data[d->code].operand[k].predicate
+				== neon_struct_operand);
+
+		    switch (d->mode)
+		      {
+		      case T_V8QI:
+		      case T_V16QI:
+			eltype = intQI_pointer_node;
+			break;
+
+		      case T_V4HI:
+		      case T_V8HI:
+			eltype = intHI_pointer_node;
+			break;
+
+		      case T_V2SI:
+		      case T_V4SI:
+			eltype = intSI_pointer_node;
+			break;
+
+		      case T_V2SF:
+		      case T_V4SF:
+			eltype = float_pointer_node;
+			break;
+
+		      case T_DI:
+		      case T_V2DI:
+			eltype = intDI_pointer_node;
+			break;
+
+		      default: gcc_unreachable ();
+		      }
+		  }
+		else
+		  {
+		    switch (insn_data[d->code].operand[k].mode)
+		      {
+		      case VOIDmode: eltype = void_type_node; break;
+			/* Scalars.  */
+		      case QImode: eltype = neon_intQI_type_node; break;
+		      case HImode: eltype = neon_intHI_type_node; break;
+		      case SImode: eltype = neon_intSI_type_node; break;
+		      case SFmode: eltype = neon_float_type_node; break;
+		      case DImode: eltype = neon_intDI_type_node; break;
+		      case TImode: eltype = intTI_type_node; break;
+		      case EImode: eltype = intEI_type_node; break;
+		      case OImode: eltype = intOI_type_node; break;
+		      case CImode: eltype = intCI_type_node; break;
+		      case XImode: eltype = intXI_type_node; break;
+			/* 64-bit vectors.  */
+		      case V8QImode: eltype = V8QI_type_node; break;
+		      case V4HImode: eltype = V4HI_type_node; break;
+		      case V2SImode: eltype = V2SI_type_node; break;
+		      case V2SFmode: eltype = V2SF_type_node; break;
+			/* 128-bit vectors.  */
+		      case V16QImode: eltype = V16QI_type_node; break;
+		      case V8HImode: eltype = V8HI_type_node; break;
+		      case V4SImode: eltype = V4SI_type_node; break;
+		      case V4SFmode: eltype = V4SF_type_node; break;
+		      case V2DImode: eltype = V2DI_type_node; break;
+		      default: gcc_unreachable ();
+		      }
+		  }
+
+		if (k == 0 && !is_store)
+		  return_type = eltype;
+		else
+		  args = tree_cons (NULL_TREE, eltype, args);
+	      }
+
+	    ftype = build_function_type (return_type, args);
+	  }
+	  break;
+
+	case NEON_RESULTPAIR:
+	  {
+	    switch (insn_data[d->code].operand[1].mode)
+	      {
+	      case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
+	      case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
+	      case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
+	      case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
+	      case DImode: ftype = void_ftype_pdi_di_di; break;
+	      case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
+	      case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
+	      case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
+	      case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
+	      case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
+	      default: gcc_unreachable ();
+	      }
+	  }
+	  break;
+
+	case NEON_REINTERP:
+	  {
+	    /* We iterate over 5 doubleword types, then 5 quadword
+	       types.  */
+	    int rhs = d->mode % 5;
+	    switch (insn_data[d->code].operand[0].mode)
+	      {
+	      case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
+	      case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
+	      case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
+	      case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
+	      case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
+	      case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
+	      case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
+	      case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
+	      case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
+	      case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
+	      default: gcc_unreachable ();
+	      }
+	  }
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      gcc_assert (ftype != NULL);
+
+      sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
+
+      decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
+				   NULL_TREE);
+      arm_builtin_decls[fcode] = decl;
+    }
+}
+
+#define def_mbuiltin(MASK, NAME, TYPE, CODE)				\
+  do									\
+    {									\
+      if ((MASK) & insn_flags)						\
+	{								\
+	  tree bdecl;							\
+	  bdecl = add_builtin_function ((NAME), (TYPE), (CODE),		\
+					BUILT_IN_MD, NULL, NULL_TREE);	\
+	  arm_builtin_decls[CODE] = bdecl;				\
+	}								\
+    }									\
+  while (0)
+  
+struct builtin_description
+{
+  const unsigned int       mask;
+  const enum insn_code     icode;
+  const char * const       name;
+  const enum arm_builtins  code;
+  const enum rtx_code      comparison;
+  const unsigned int       flag;
+};
+  
+static const struct builtin_description bdesc_2arg[] =
+{
+#define IWMMXT_BUILTIN(code, string, builtin) \
+  { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
+    ARM_BUILTIN_##builtin, UNKNOWN, 0 },
+  
+  IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
+  IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
+  IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
+  IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
+  IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
+  IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
+  IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
+  IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
+  IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
+  IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
+  IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
+  IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
+  IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
+  IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
+  IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
+  IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
+  IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
+  IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
+  IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
+  IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
+  IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
+  IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
+  IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
+  IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
+  IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
+  IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
+  IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
+  IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
+  IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
+  IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
+  IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
+  IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
+  IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
+  IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
+  IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
+  IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
+  IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
+  IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
+  IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
+  IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
+  IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
+  IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
+  IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
+  IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
+  IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
+  IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
+  IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
+  IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
+  IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
+  IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
+  IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
+  IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
+  IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
+  IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
+  IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
+  IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
+  IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
+  IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
+  
+#define IWMMXT_BUILTIN2(code, builtin) \
+  { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
+  
+  IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
+  IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
+  IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
+  IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
+  IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
+  IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
+  IWMMXT_BUILTIN2 (ashlv4hi3_di,    WSLLH)
+  IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
+  IWMMXT_BUILTIN2 (ashlv2si3_di,    WSLLW)
+  IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
+  IWMMXT_BUILTIN2 (ashldi3_di,      WSLLD)
+  IWMMXT_BUILTIN2 (ashldi3_iwmmxt,  WSLLDI)
+  IWMMXT_BUILTIN2 (lshrv4hi3_di,    WSRLH)
+  IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
+  IWMMXT_BUILTIN2 (lshrv2si3_di,    WSRLW)
+  IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
+  IWMMXT_BUILTIN2 (lshrdi3_di,      WSRLD)
+  IWMMXT_BUILTIN2 (lshrdi3_iwmmxt,  WSRLDI)
+  IWMMXT_BUILTIN2 (ashrv4hi3_di,    WSRAH)
+  IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
+  IWMMXT_BUILTIN2 (ashrv2si3_di,    WSRAW)
+  IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
+  IWMMXT_BUILTIN2 (ashrdi3_di,      WSRAD)
+  IWMMXT_BUILTIN2 (ashrdi3_iwmmxt,  WSRADI)
+  IWMMXT_BUILTIN2 (rorv4hi3_di,     WRORH)
+  IWMMXT_BUILTIN2 (rorv4hi3,        WRORHI)
+  IWMMXT_BUILTIN2 (rorv2si3_di,     WRORW)
+  IWMMXT_BUILTIN2 (rorv2si3,        WRORWI)
+  IWMMXT_BUILTIN2 (rordi3_di,       WRORD)
+  IWMMXT_BUILTIN2 (rordi3,          WRORDI)
+  IWMMXT_BUILTIN2 (iwmmxt_wmacuz,   WMACUZ)
+  IWMMXT_BUILTIN2 (iwmmxt_wmacsz,   WMACSZ)
+};
+  
+static const struct builtin_description bdesc_1arg[] =
+{
+  IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
+  IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
+  IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
+  IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
+  IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
+  IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
+  IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
+  IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
+  IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
+  IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
+  IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
+  IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
+  IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
+  IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
+  IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
+  IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
+  IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
+  IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
+};
+  
+/* Set up all the iWMMXt builtins.  This is not called if
+   TARGET_IWMMXT is zero.  */
+
+static void
+arm_init_iwmmxt_builtins (void)
+{
+  const struct builtin_description * d;
+  size_t i;
+  tree endlink = void_list_node;
+
+  tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
+  tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
+  tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
+
+  tree int_ftype_int
+    = build_function_type (integer_type_node,
+			   tree_cons (NULL_TREE, integer_type_node, endlink));
+  tree v8qi_ftype_v8qi_v8qi_int
+    = build_function_type (V8QI_type_node,
+			   tree_cons (NULL_TREE, V8QI_type_node,
+				      tree_cons (NULL_TREE, V8QI_type_node,
+						 tree_cons (NULL_TREE,
+							    integer_type_node,
+							    endlink))));
+  tree v4hi_ftype_v4hi_int
+    = build_function_type (V4HI_type_node,
+			   tree_cons (NULL_TREE, V4HI_type_node,
+				      tree_cons (NULL_TREE, integer_type_node,
+						 endlink)));
+  tree v2si_ftype_v2si_int
+    = build_function_type (V2SI_type_node,
+			   tree_cons (NULL_TREE, V2SI_type_node,
+				      tree_cons (NULL_TREE, integer_type_node,
+						 endlink)));
+  tree v2si_ftype_di_di
+    = build_function_type (V2SI_type_node,
+			   tree_cons (NULL_TREE, long_long_integer_type_node,
+				      tree_cons (NULL_TREE,
+						 long_long_integer_type_node,
+						 endlink)));
+  tree di_ftype_di_int
+    = build_function_type (long_long_integer_type_node,
+			   tree_cons (NULL_TREE, long_long_integer_type_node,
+				      tree_cons (NULL_TREE, integer_type_node,
+						 endlink)));
+  tree di_ftype_di_int_int
+    = build_function_type (long_long_integer_type_node,
+			   tree_cons (NULL_TREE, long_long_integer_type_node,
+				      tree_cons (NULL_TREE, integer_type_node,
+						 tree_cons (NULL_TREE,
+							    integer_type_node,
+							    endlink))));
+  tree int_ftype_v8qi
+    = build_function_type (integer_type_node,
+			   tree_cons (NULL_TREE, V8QI_type_node,
+				      endlink));
+  tree int_ftype_v4hi
+    = build_function_type (integer_type_node,
+			   tree_cons (NULL_TREE, V4HI_type_node,
+				      endlink));
+  tree int_ftype_v2si
+    = build_function_type (integer_type_node,
+			   tree_cons (NULL_TREE, V2SI_type_node,
+				      endlink));
+  tree int_ftype_v8qi_int
+    = build_function_type (integer_type_node,
+			   tree_cons (NULL_TREE, V8QI_type_node,
+				      tree_cons (NULL_TREE, integer_type_node,
+						 endlink)));
+  tree int_ftype_v4hi_int
+    = build_function_type (integer_type_node,
+			   tree_cons (NULL_TREE, V4HI_type_node,
+				      tree_cons (NULL_TREE, integer_type_node,
+						 endlink)));
+  tree int_ftype_v2si_int
+    = build_function_type (integer_type_node,
+			   tree_cons (NULL_TREE, V2SI_type_node,
+				      tree_cons (NULL_TREE, integer_type_node,
+						 endlink)));
+  tree v8qi_ftype_v8qi_int_int
+    = build_function_type (V8QI_type_node,
+			   tree_cons (NULL_TREE, V8QI_type_node,
+				      tree_cons (NULL_TREE, integer_type_node,
+						 tree_cons (NULL_TREE,
+							    integer_type_node,
+							    endlink))));
+  tree v4hi_ftype_v4hi_int_int
+    = build_function_type (V4HI_type_node,
+			   tree_cons (NULL_TREE, V4HI_type_node,
+				      tree_cons (NULL_TREE, integer_type_node,
+						 tree_cons (NULL_TREE,
+							    integer_type_node,
+							    endlink))));
+  tree v2si_ftype_v2si_int_int
+    = build_function_type (V2SI_type_node,
+			   tree_cons (NULL_TREE, V2SI_type_node,
+				      tree_cons (NULL_TREE, integer_type_node,
+						 tree_cons (NULL_TREE,
+							    integer_type_node,
+							    endlink))));
+  /* Miscellaneous.  */
+  tree v8qi_ftype_v4hi_v4hi
+    = build_function_type (V8QI_type_node,
+			   tree_cons (NULL_TREE, V4HI_type_node,
+				      tree_cons (NULL_TREE, V4HI_type_node,
+						 endlink)));
+  tree v4hi_ftype_v2si_v2si
+    = build_function_type (V4HI_type_node,
+			   tree_cons (NULL_TREE, V2SI_type_node,
+				      tree_cons (NULL_TREE, V2SI_type_node,
+						 endlink)));
+  tree v2si_ftype_v4hi_v4hi
+    = build_function_type (V2SI_type_node,
+			   tree_cons (NULL_TREE, V4HI_type_node,
+				      tree_cons (NULL_TREE, V4HI_type_node,
+						 endlink)));
+  tree v2si_ftype_v8qi_v8qi
+    = build_function_type (V2SI_type_node,
+			   tree_cons (NULL_TREE, V8QI_type_node,
+				      tree_cons (NULL_TREE, V8QI_type_node,
+						 endlink)));
+  tree v4hi_ftype_v4hi_di
+    = build_function_type (V4HI_type_node,
+			   tree_cons (NULL_TREE, V4HI_type_node,
+				      tree_cons (NULL_TREE,
+						 long_long_integer_type_node,
+						 endlink)));
+  tree v2si_ftype_v2si_di
+    = build_function_type (V2SI_type_node,
+			   tree_cons (NULL_TREE, V2SI_type_node,
+				      tree_cons (NULL_TREE,
+						 long_long_integer_type_node,
+						 endlink)));
+  tree void_ftype_int_int
+    = build_function_type (void_type_node,
+			   tree_cons (NULL_TREE, integer_type_node,
+				      tree_cons (NULL_TREE, integer_type_node,
+						 endlink)));
+  tree di_ftype_void
+    = build_function_type (long_long_unsigned_type_node, endlink);
+  tree di_ftype_v8qi
+    = build_function_type (long_long_integer_type_node,
+			   tree_cons (NULL_TREE, V8QI_type_node,
+				      endlink));
+  tree di_ftype_v4hi
+    = build_function_type (long_long_integer_type_node,
+			   tree_cons (NULL_TREE, V4HI_type_node,
+				      endlink));
+  tree di_ftype_v2si
+    = build_function_type (long_long_integer_type_node,
+			   tree_cons (NULL_TREE, V2SI_type_node,
+				      endlink));
+  tree v2si_ftype_v4hi
+    = build_function_type (V2SI_type_node,
+			   tree_cons (NULL_TREE, V4HI_type_node,
+				      endlink));
+  tree v4hi_ftype_v8qi
+    = build_function_type (V4HI_type_node,
+			   tree_cons (NULL_TREE, V8QI_type_node,
+				      endlink));
+
+  tree di_ftype_di_v4hi_v4hi
+    = build_function_type (long_long_unsigned_type_node,
+			   tree_cons (NULL_TREE,
+				      long_long_unsigned_type_node,
+				      tree_cons (NULL_TREE, V4HI_type_node,
+						 tree_cons (NULL_TREE,
+							    V4HI_type_node,
+							    endlink))));
 
-  for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
+  tree di_ftype_v4hi_v4hi
+    = build_function_type (long_long_unsigned_type_node,
+			   tree_cons (NULL_TREE, V4HI_type_node,
+				      tree_cons (NULL_TREE, V4HI_type_node,
+						 endlink)));
+  
+  /* Normal vector binops.  */
+  tree v8qi_ftype_v8qi_v8qi
+    = build_function_type (V8QI_type_node,
+			   tree_cons (NULL_TREE, V8QI_type_node,
+				      tree_cons (NULL_TREE, V8QI_type_node,
+						 endlink)));
+  tree v4hi_ftype_v4hi_v4hi
+    = build_function_type (V4HI_type_node,
+			   tree_cons (NULL_TREE, V4HI_type_node,
+				      tree_cons (NULL_TREE, V4HI_type_node,
+						 endlink)));
+  tree v2si_ftype_v2si_v2si
+    = build_function_type (V2SI_type_node,
+			   tree_cons (NULL_TREE, V2SI_type_node,
+				      tree_cons (NULL_TREE, V2SI_type_node,
+						 endlink)));
+  tree di_ftype_di_di
+    = build_function_type (long_long_unsigned_type_node,
+			   tree_cons (NULL_TREE, long_long_unsigned_type_node,
+				      tree_cons (NULL_TREE,
+						 long_long_unsigned_type_node,
+						 endlink)));
+  
+  /* Add all builtins that are more or less simple operations on two
+     operands.  */
+  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
     {
-      neon_builtin_datum *d = &neon_builtin_data[i];
-      unsigned int j, codeidx = 0;
-
-      d->base_fcode = fcode;
-
-      for (j = 0; j < T_MAX; j++)
-	{
-	  const char* const modenames[] = {
-	    "v8qi", "v4hi", "v2si", "v2sf", "di",
-	    "v16qi", "v8hi", "v4si", "v4sf", "v2di"
-	  };
-	  char namebuf[60];
-	  tree ftype = NULL;
-	  enum insn_code icode;
-	  int is_load = 0, is_store = 0;
-
-          if ((d->bits & (1 << j)) == 0)
-            continue;
-
-          icode = d->codes[codeidx++];
-
-          switch (d->itype)
-            {
-	    case NEON_LOAD1:
-	    case NEON_LOAD1LANE:
-	    case NEON_LOADSTRUCT:
-	    case NEON_LOADSTRUCTLANE:
-	      is_load = 1;
-	      /* Fall through.  */
-	    case NEON_STORE1:
-	    case NEON_STORE1LANE:
-	    case NEON_STORESTRUCT:
-	    case NEON_STORESTRUCTLANE:
-	      if (!is_load)
-	        is_store = 1;
-	      /* Fall through.  */
-            case NEON_UNOP:
-	    case NEON_BINOP:
-	    case NEON_LOGICBINOP:
-	    case NEON_SHIFTINSERT:
-	    case NEON_TERNOP:
-	    case NEON_GETLANE:
-	    case NEON_SETLANE:
-	    case NEON_CREATE:
-	    case NEON_DUP:
-	    case NEON_DUPLANE:
-	    case NEON_SHIFTIMM:
-	    case NEON_SHIFTACC:
-	    case NEON_COMBINE:
-	    case NEON_SPLIT:
-	    case NEON_CONVERT:
-	    case NEON_FIXCONV:
-	    case NEON_LANEMUL:
-	    case NEON_LANEMULL:
-	    case NEON_LANEMULH:
-	    case NEON_LANEMAC:
-	    case NEON_SCALARMUL:
-	    case NEON_SCALARMULL:
-	    case NEON_SCALARMULH:
-	    case NEON_SCALARMAC:
-	    case NEON_SELECT:
-	    case NEON_VTBL:
-	    case NEON_VTBX:
-	      {
-		int k;
-		tree return_type = void_type_node, args = void_list_node;
-
-		/* Build a function type directly from the insn_data for this
-		   builtin.  The build_function_type() function takes care of
-		   removing duplicates for us.  */
-		for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
-		  {
-		    tree eltype;
-
-		    if (is_load && k == 1)
-		      {
-		        /* Neon load patterns always have the memory operand
-			   (a SImode pointer) in the operand 1 position.  We
-			   want a const pointer to the element type in that
-			   position.  */
-		        gcc_assert (insn_data[icode].operand[k].mode == SImode);
-
-			switch (1 << j)
-			  {
-			  case T_V8QI:
-			  case T_V16QI:
-			    eltype = const_intQI_pointer_node;
-			    break;
-
-			  case T_V4HI:
-			  case T_V8HI:
-			    eltype = const_intHI_pointer_node;
-			    break;
-
-			  case T_V2SI:
-			  case T_V4SI:
-			    eltype = const_intSI_pointer_node;
-			    break;
-
-			  case T_V2SF:
-			  case T_V4SF:
-			    eltype = const_float_pointer_node;
-			    break;
-
-			  case T_DI:
-			  case T_V2DI:
-			    eltype = const_intDI_pointer_node;
-			    break;
-
-			  default: gcc_unreachable ();
-			  }
-  		      }
-		    else if (is_store && k == 0)
-		      {
-		        /* Similarly, Neon store patterns use operand 0 as
-			   the memory location to store to (a SImode pointer).
-			   Use a pointer to the element type of the store in
-			   that position.  */
-			gcc_assert (insn_data[icode].operand[k].mode == SImode);
-
-			switch (1 << j)
-			  {
-			  case T_V8QI:
-			  case T_V16QI:
-			    eltype = intQI_pointer_node;
-			    break;
-
-			  case T_V4HI:
-			  case T_V8HI:
-			    eltype = intHI_pointer_node;
-			    break;
-
-			  case T_V2SI:
-			  case T_V4SI:
-			    eltype = intSI_pointer_node;
-			    break;
-
-			  case T_V2SF:
-			  case T_V4SF:
-			    eltype = float_pointer_node;
-			    break;
-
-			  case T_DI:
-			  case T_V2DI:
-			    eltype = intDI_pointer_node;
-			    break;
+      /* Use one of the operands; the target can have a different mode for
+	 mask-generating compares.  */
+      enum machine_mode mode;
+      tree type;
 
-			  default: gcc_unreachable ();
-			  }
-		      }
-		    else
-		      {
-			switch (insn_data[icode].operand[k].mode)
-	        	  {
-			  case VOIDmode: eltype = void_type_node; break;
-			  /* Scalars.  */
-			  case QImode: eltype = neon_intQI_type_node; break;
-			  case HImode: eltype = neon_intHI_type_node; break;
-			  case SImode: eltype = neon_intSI_type_node; break;
-			  case SFmode: eltype = neon_float_type_node; break;
-			  case DImode: eltype = neon_intDI_type_node; break;
-			  case TImode: eltype = intTI_type_node; break;
-			  case EImode: eltype = intEI_type_node; break;
-			  case OImode: eltype = intOI_type_node; break;
-			  case CImode: eltype = intCI_type_node; break;
-			  case XImode: eltype = intXI_type_node; break;
-			  /* 64-bit vectors.  */
-			  case V8QImode: eltype = V8QI_type_node; break;
-			  case V4HImode: eltype = V4HI_type_node; break;
-			  case V2SImode: eltype = V2SI_type_node; break;
-			  case V2SFmode: eltype = V2SF_type_node; break;
-			  /* 128-bit vectors.  */
-			  case V16QImode: eltype = V16QI_type_node; break;
-			  case V8HImode: eltype = V8HI_type_node; break;
-			  case V4SImode: eltype = V4SI_type_node; break;
-			  case V4SFmode: eltype = V4SF_type_node; break;
-			  case V2DImode: eltype = V2DI_type_node; break;
-			  default: gcc_unreachable ();
-			  }
-		      }
+      if (d->name == 0)
+	continue;
 
-		    if (k == 0 && !is_store)
-	              return_type = eltype;
-		    else
-		      args = tree_cons (NULL_TREE, eltype, args);
-		  }
+      mode = insn_data[d->icode].operand[1].mode;
 
-		ftype = build_function_type (return_type, args);
-	      }
-	      break;
+      switch (mode)
+	{
+	case V8QImode:
+	  type = v8qi_ftype_v8qi_v8qi;
+	  break;
+	case V4HImode:
+	  type = v4hi_ftype_v4hi_v4hi;
+	  break;
+	case V2SImode:
+	  type = v2si_ftype_v2si_v2si;
+	  break;
+	case DImode:
+	  type = di_ftype_di_di;
+	  break;
 
-	    case NEON_RESULTPAIR:
-              {
-                switch (insn_data[icode].operand[1].mode)
-                  {
-		  case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
-                  case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
-                  case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
-                  case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
-                  case DImode: ftype = void_ftype_pdi_di_di; break;
-                  case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
-                  case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
-                  case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
-                  case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
-                  case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
-                  default: gcc_unreachable ();
-                  }
-              }
-              break;
+	default:
+	  gcc_unreachable ();
+	}
 
-	    case NEON_REINTERP:
-              {
-                /* We iterate over 5 doubleword types, then 5 quadword
-                   types.  */
-                int rhs = j % 5;
-                switch (insn_data[icode].operand[0].mode)
-                  {
-                  case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
-                  case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
-                  case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
-                  case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
-                  case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
-                  case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
-                  case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
-                  case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
-		  case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
-                  case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
-                  default: gcc_unreachable ();
-                  }
-              }
-              break;
+      def_mbuiltin (d->mask, d->name, type, d->code);
+    }
 
-            default:
-              gcc_unreachable ();
-            }
+  /* Add the remaining MMX insns with somewhat more complicated types.  */
+#define iwmmx_mbuiltin(NAME, TYPE, CODE)			\
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE),	\
+		ARM_BUILTIN_ ## CODE)
+
+  iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
+  iwmmx_mbuiltin ("setwcx", void_ftype_int_int, SETWCX);
+  iwmmx_mbuiltin ("getwcx", int_ftype_int, GETWCX);
+
+  iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
+  iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
+  iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
+  iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
+  iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
+  iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
+
+  iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
+  iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
+  iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
+  iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
+  iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
+  iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
+
+  iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
+  iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
+  iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
+  iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
+  iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
+  iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
+
+  iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
+  iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
+  iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
+  iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
+  iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
+  iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
+
+  iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
+
+  iwmmx_mbuiltin ("wsadb", v2si_ftype_v8qi_v8qi, WSADB);
+  iwmmx_mbuiltin ("wsadh", v2si_ftype_v4hi_v4hi, WSADH);
+  iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
+  iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
+
+  iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
+  iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
+  iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
+  iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
+  iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
+  iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
+  iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
+  iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
+  iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
+
+  iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
+  iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
+  iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
+
+  iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
+  iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
+  iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
+
+  iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
+  iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
+  iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
+  iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
+  iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
+  iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
+
+  iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
+  iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
+  iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
+  iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
+  iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
+  iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
+  iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
+  iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
+  iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
+  iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
+  iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
+  iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
+
+  iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
+  iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
+  iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
+  iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
+
+  iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGN);
+  iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
+  iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
+  iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
+  iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
+  iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
+  iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
 
-          gcc_assert (ftype != NULL);
+#undef iwmmx_mbuiltin
+}
 
-          sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
+static void
+arm_init_tls_builtins (void)
+{
+  tree ftype, decl;
 
-          add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
-				NULL_TREE);
-        }
-    }
+  ftype = build_function_type (ptr_type_node, void_list_node);
+  decl = add_builtin_function ("__builtin_thread_pointer", ftype,
+			       ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
+			       NULL, NULL_TREE);
+  TREE_NOTHROW (decl) = 1;
+  TREE_READONLY (decl) = 1;
+  arm_builtin_decls[ARM_BUILTIN_THREAD_POINTER] = decl;
 }
 
 static void
@@ -19164,6 +20253,17 @@
     arm_init_fp16_builtins ();
 }
 
+/* Return the ARM builtin for CODE.  */
+
+static tree
+arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
+{
+  if (code >= ARM_BUILTIN_MAX)
+    return error_mark_node;
+
+  return arm_builtin_decls[code];
+}
+
 /* Implement TARGET_INVALID_PARAMETER_TYPE.  */
 
 static const char *
@@ -19315,55 +20415,68 @@
   return target;
 }
 
-static int
-neon_builtin_compare (const void *a, const void *b)
-{
-  const neon_builtin_datum *const key = (const neon_builtin_datum *) a;
-  const neon_builtin_datum *const memb = (const neon_builtin_datum *) b;
-  unsigned int soughtcode = key->base_fcode;
-
-  if (soughtcode >= memb->base_fcode
-      && soughtcode < memb->base_fcode + memb->num_vars)
-    return 0;
-  else if (soughtcode < memb->base_fcode)
-    return -1;
-  else
-    return 1;
-}
-
-static enum insn_code
-locate_neon_builtin_icode (int fcode, neon_itype *itype)
-{
-  neon_builtin_datum key
-    = { NULL, (neon_itype) 0, 0, { CODE_FOR_nothing }, 0, 0 };
-  neon_builtin_datum *found;
-  int idx;
-
-  key.base_fcode = fcode;
-  found = (neon_builtin_datum *)
-    bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
-		   sizeof (neon_builtin_data[0]), neon_builtin_compare);
-  gcc_assert (found);
-  idx = fcode - (int) found->base_fcode;
-  gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
-
-  if (itype)
-    *itype = found->itype;
-
-  return found->codes[idx];
-}
-
 typedef enum {
   NEON_ARG_COPY_TO_REG,
   NEON_ARG_CONSTANT,
+  NEON_ARG_MEMORY,
   NEON_ARG_STOP
 } builtin_arg;
 
 #define NEON_MAX_BUILTIN_ARGS 5
 
+/* EXP is a pointer argument to a Neon load or store intrinsic.  Derive
+   and return an expression for the accessed memory.
+
+   The intrinsic function operates on a block of registers that has
+   mode REG_MODE.  This block contains vectors of type TYPE_MODE.
+   The function references the memory at EXP in mode MEM_MODE;
+   this mode may be BLKmode if no more suitable mode is available.  */
+
+static tree
+neon_dereference_pointer (tree exp, enum machine_mode mem_mode,
+			  enum machine_mode reg_mode,
+			  neon_builtin_type_mode type_mode)
+{
+  HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
+  tree elem_type, upper_bound, array_type;
+
+  /* Work out the size of the register block in bytes.  */
+  reg_size = GET_MODE_SIZE (reg_mode);
+
+  /* Work out the size of each vector in bytes.  */
+  gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
+  vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
+
+  /* Work out how many vectors there are.  */
+  gcc_assert (reg_size % vector_size == 0);
+  nvectors = reg_size / vector_size;
+
+  /* Work out how many elements are being loaded or stored.
+     MEM_MODE == REG_MODE implies a one-to-one mapping between register
+     and memory elements; anything else implies a lane load or store.  */
+  if (mem_mode == reg_mode)
+    nelems = vector_size * nvectors;
+  else
+    nelems = nvectors;
+
+  /* Work out the type of each element.  */
+  gcc_assert (POINTER_TYPE_P (TREE_TYPE (exp)));
+  elem_type = TREE_TYPE (TREE_TYPE (exp));
+
+  /* Create a type that describes the full access.  */
+  upper_bound = build_int_cst (size_type_node, nelems - 1);
+  array_type = build_array_type (elem_type, build_index_type (upper_bound));
+  /* Dereference EXP using that type.  */
+  exp = fold_convert (build_pointer_type (array_type), exp);
+
+  return fold_build2 (MEM_REF, array_type, exp,
+		      build_int_cst (TREE_TYPE (exp), 0));
+}
+
 /* Expand a Neon builtin.  */
 static rtx
 arm_expand_neon_args (rtx target, int icode, int have_retval,
+		      neon_builtin_type_mode type_mode,
 		      tree exp, ...)
 {
   va_list ap;
@@ -19372,7 +20485,9 @@
   rtx op[NEON_MAX_BUILTIN_ARGS];
   enum machine_mode tmode = insn_data[icode].operand[0].mode;
   enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
+  enum machine_mode other_mode;
   int argc = 0;
+  int opno;
 
   if (have_retval
       && (!target
@@ -19390,26 +20505,46 @@
         break;
       else
         {
+          opno = argc + have_retval;
+          mode[argc] = insn_data[icode].operand[opno].mode;
           arg[argc] = CALL_EXPR_ARG (exp, argc);
+          if (thisarg == NEON_ARG_MEMORY)
+            {
+              other_mode = insn_data[icode].operand[1 - opno].mode;
+              arg[argc] = neon_dereference_pointer (arg[argc], mode[argc],
+                                                    other_mode, type_mode);
+            }
           op[argc] = expand_normal (arg[argc]);
-          mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
 
           switch (thisarg)
             {
             case NEON_ARG_COPY_TO_REG:
               /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
-              if (!(*insn_data[icode].operand[argc + have_retval].predicate)
+              if (!(*insn_data[icode].operand[opno].predicate)
                      (op[argc], mode[argc]))
                 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
               break;
 
             case NEON_ARG_CONSTANT:
               /* FIXME: This error message is somewhat unhelpful.  */
-              if (!(*insn_data[icode].operand[argc + have_retval].predicate)
+              if (!(*insn_data[icode].operand[opno].predicate)
                     (op[argc], mode[argc]))
 		error ("argument must be a constant");
               break;
 
+            case NEON_ARG_MEMORY:
+	      gcc_assert (MEM_P (op[argc]));
+	      PUT_MODE (op[argc], mode[argc]);
+	      /* ??? arm_neon.h uses the same built-in functions for signed
+		 and unsigned accesses, casting where necessary.  This isn't
+		 alias safe.  */
+	      set_mem_alias_set (op[argc], 0);
+	      if (!(*insn_data[icode].operand[opno].predicate)
+                    (op[argc], mode[argc]))
+		op[argc] = (replace_equiv_address
+			    (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
+              break;
+
             case NEON_ARG_STOP:
               gcc_unreachable ();
             }
@@ -19487,15 +20622,17 @@
 static rtx
 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
 {
-  neon_itype itype;
-  enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
+  neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
+  neon_itype itype = d->itype;
+  enum insn_code icode = d->code;
+  neon_builtin_type_mode type_mode = d->mode;
 
   switch (itype)
     {
     case NEON_UNOP:
     case NEON_CONVERT:
     case NEON_DUPLANE:
-      return arm_expand_neon_args (target, icode, 1, exp,
+      return arm_expand_neon_args (target, icode, 1, type_mode, exp,
         NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
 
     case NEON_BINOP:
@@ -19505,90 +20642,90 @@
     case NEON_SCALARMULH:
     case NEON_SHIFTINSERT:
     case NEON_LOGICBINOP:
-      return arm_expand_neon_args (target, icode, 1, exp,
+      return arm_expand_neon_args (target, icode, 1, type_mode, exp,
         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
         NEON_ARG_STOP);
 
     case NEON_TERNOP:
-      return arm_expand_neon_args (target, icode, 1, exp,
+      return arm_expand_neon_args (target, icode, 1, type_mode, exp,
         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
         NEON_ARG_CONSTANT, NEON_ARG_STOP);
 
     case NEON_GETLANE:
     case NEON_FIXCONV:
     case NEON_SHIFTIMM:
-      return arm_expand_neon_args (target, icode, 1, exp,
+      return arm_expand_neon_args (target, icode, 1, type_mode, exp,
         NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
         NEON_ARG_STOP);
 
     case NEON_CREATE:
-      return arm_expand_neon_args (target, icode, 1, exp,
+      return arm_expand_neon_args (target, icode, 1, type_mode, exp,
         NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
 
     case NEON_DUP:
     case NEON_SPLIT:
     case NEON_REINTERP:
-      return arm_expand_neon_args (target, icode, 1, exp,
+      return arm_expand_neon_args (target, icode, 1, type_mode, exp,
         NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
 
     case NEON_COMBINE:
     case NEON_VTBL:
-      return arm_expand_neon_args (target, icode, 1, exp,
+      return arm_expand_neon_args (target, icode, 1, type_mode, exp,
         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
 
     case NEON_RESULTPAIR:
-      return arm_expand_neon_args (target, icode, 0, exp,
+      return arm_expand_neon_args (target, icode, 0, type_mode, exp,
         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
         NEON_ARG_STOP);
 
     case NEON_LANEMUL:
     case NEON_LANEMULL:
     case NEON_LANEMULH:
-      return arm_expand_neon_args (target, icode, 1, exp,
+      return arm_expand_neon_args (target, icode, 1, type_mode, exp,
         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
         NEON_ARG_CONSTANT, NEON_ARG_STOP);
 
     case NEON_LANEMAC:
-      return arm_expand_neon_args (target, icode, 1, exp,
+      return arm_expand_neon_args (target, icode, 1, type_mode, exp,
         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
         NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
 
     case NEON_SHIFTACC:
-      return arm_expand_neon_args (target, icode, 1, exp,
+      return arm_expand_neon_args (target, icode, 1, type_mode, exp,
         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
         NEON_ARG_CONSTANT, NEON_ARG_STOP);
 
     case NEON_SCALARMAC:
-      return arm_expand_neon_args (target, icode, 1, exp,
+      return arm_expand_neon_args (target, icode, 1, type_mode, exp,
 	NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
         NEON_ARG_CONSTANT, NEON_ARG_STOP);
 
     case NEON_SELECT:
     case NEON_VTBX:
-      return arm_expand_neon_args (target, icode, 1, exp,
+      return arm_expand_neon_args (target, icode, 1, type_mode, exp,
 	NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
         NEON_ARG_STOP);
 
     case NEON_LOAD1:
     case NEON_LOADSTRUCT:
-      return arm_expand_neon_args (target, icode, 1, exp,
-	NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
+      return arm_expand_neon_args (target, icode, 1, type_mode, exp,
+	NEON_ARG_MEMORY, NEON_ARG_STOP);
 
     case NEON_LOAD1LANE:
     case NEON_LOADSTRUCTLANE:
-      return arm_expand_neon_args (target, icode, 1, exp,
-	NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
+      return arm_expand_neon_args (target, icode, 1, type_mode, exp,
+	NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
 	NEON_ARG_STOP);
 
     case NEON_STORE1:
     case NEON_STORESTRUCT:
-      return arm_expand_neon_args (target, icode, 0, exp,
-	NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
+      return arm_expand_neon_args (target, icode, 0, type_mode, exp,
+	NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
 
     case NEON_STORE1LANE:
     case NEON_STORESTRUCTLANE:
-      return arm_expand_neon_args (target, icode, 0, exp,
-	NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
+      return arm_expand_neon_args (target, icode, 0, type_mode, exp,
+	NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
 	NEON_ARG_STOP);
     }
 
@@ -21501,6 +22638,8 @@
       const char *fpu_name;
       if (arm_selected_arch)
 	asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
+      else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
+	asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
       else
 	asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
 
@@ -21564,6 +22703,10 @@
 	val = 6;
       asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
 
+      /* Tag_CPU_unaligned_access.  */
+      asm_fprintf (asm_out_file, "\t.eabi_attribute 34, %d\n",
+		   unaligned_access);
+
       /* Tag_ABI_FP_16bit_format.  */
       if (arm_fp16_format)
 	asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
@@ -22307,7 +23450,21 @@
   return false;
 }
 
-/* Use the option -mvectorize-with-neon-quad to override the use of doubleword
+/* Implements target hook array_mode_supported_p.  */
+
+static bool
+arm_array_mode_supported_p (enum machine_mode mode,
+			    unsigned HOST_WIDE_INT nelems)
+{
+  if (TARGET_NEON
+      && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
+      && (nelems >= 2 && nelems <= 4))
+    return true;
+
+  return false;
+}
+
+/* Use the option -mvectorize-with-neon-double to override the use of quardword
    registers when autovectorizing for Neon, at least until multiple vector
    widths are supported properly by the middle-end.  */
 
@@ -22318,15 +23475,15 @@
     switch (mode)
       {
       case SFmode:
-	return TARGET_NEON_VECTORIZE_QUAD ? V4SFmode : V2SFmode;
+	return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
       case SImode:
-	return TARGET_NEON_VECTORIZE_QUAD ? V4SImode : V2SImode;
+	return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
       case HImode:
-	return TARGET_NEON_VECTORIZE_QUAD ? V8HImode : V4HImode;
+	return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
       case QImode:
-	return TARGET_NEON_VECTORIZE_QUAD ? V16QImode : V8QImode;
+	return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
       case DImode:
-	if (TARGET_NEON_VECTORIZE_QUAD)
+	if (!TARGET_NEON_VECTORIZE_DOUBLE)
 	  return V2DImode;
 	break;
 
@@ -22351,14 +23508,16 @@
 
 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
  
-   We need to define this for LO_REGS on thumb.  Otherwise we can end up
-   using r0-r4 for function arguments, r7 for the stack frame and don't
-   have enough left over to do doubleword arithmetic.  */
-
+   We need to define this for LO_REGS on Thumb-1.  Otherwise we can end up
+   using r0-r4 for function arguments, r7 for the stack frame and don't have
+   enough left over to do doubleword arithmetic.  For Thumb-2 all the
+   potentially problematic instructions accept high registers so this is not
+   necessary.  Care needs to be taken to avoid adding new Thumb-2 patterns
+   that require many low registers.  */
 static bool
 arm_class_likely_spilled_p (reg_class_t rclass)
 {
-  if ((TARGET_THUMB && rclass == LO_REGS)
+  if ((TARGET_THUMB1 && rclass == LO_REGS)
       || rclass  == CC_REG)
     return true;
 
@@ -23010,8 +24169,13 @@
 {
   switch (arm_tune)
     {
+    case cortexa15:
+      return 3;
+
     case cortexr4:
     case cortexr4f:
+    case cortexr5:
+    case genericv7a:
     case cortexa5:
     case cortexa8:
     case cortexa9:
@@ -23264,12 +24428,26 @@
 		  rtx target,
 		  rtx memory)
 {
-  const char *suffix = arm_ldrex_suffix (mode);
-  rtx operands[2];
+  rtx operands[3];
 
   operands[0] = target;
-  operands[1] = memory;
-  arm_output_asm_insn (emit, 0, operands, "ldrex%s\t%%0, %%C1", suffix);
+  if (mode != DImode)
+    {
+      const char *suffix = arm_ldrex_suffix (mode);
+      operands[1] = memory;
+      arm_output_asm_insn (emit, 0, operands, "ldrex%s\t%%0, %%C1", suffix);
+    }
+  else
+    {
+      /* The restrictions on target registers in ARM mode are that the two
+	 registers are consecutive and the first one is even; Thumb is
+	 actually more flexible, but DI should give us this anyway.
+	 Note that the 1st register always gets the lowest word in memory.  */
+      gcc_assert ((REGNO (target) & 1) == 0);
+      operands[1] = gen_rtx_REG (SImode, REGNO (target) + 1);
+      operands[2] = memory;
+      arm_output_asm_insn (emit, 0, operands, "ldrexd\t%%0, %%1, %%C2");
+    }
 }
 
 /* Emit a strex{b,h,d, } instruction appropriate for the specified
@@ -23282,14 +24460,41 @@
 		  rtx value,
 		  rtx memory)
 {
-  const char *suffix = arm_ldrex_suffix (mode);
-  rtx operands[3];
+  rtx operands[4];
 
   operands[0] = result;
   operands[1] = value;
-  operands[2] = memory;
-  arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2", suffix,
-		       cc);
+  if (mode != DImode)
+    {
+      const char *suffix = arm_ldrex_suffix (mode);
+      operands[2] = memory;
+      arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2",
+			  suffix, cc);
+    }
+  else
+    {
+      /* The restrictions on target registers in ARM mode are that the two
+	 registers are consecutive and the first one is even; Thumb is
+	 actually more flexible, but DI should give us this anyway.
+	 Note that the 1st register always gets the lowest word in memory.  */
+      gcc_assert ((REGNO (value) & 1) == 0 || TARGET_THUMB2);
+      operands[2] = gen_rtx_REG (SImode, REGNO (value) + 1);
+      operands[3] = memory;
+      arm_output_asm_insn (emit, 0, operands, "strexd%s\t%%0, %%1, %%2, %%C3",
+			   cc);
+    }
+}
+
+/* Helper to emit an it instruction in Thumb2 mode only; although the assembler
+   will ignore it in ARM mode, emitting it will mess up instruction counts we
+   sometimes keep 'flags' are the extra t's and e's if it's more than one
+   instruction that is conditional.  */
+static void
+arm_output_it (emit_f emit, const char *flags, const char *cond)
+{
+  rtx operands[1]; /* Don't actually use the operand.  */
+  if (TARGET_THUMB2)
+    arm_output_asm_insn (emit, 0, operands, "it%s\t%s", flags, cond);
 }
 
 /* Helper to emit a two operand instruction.  */
@@ -23331,7 +24536,7 @@
 
    required_value:
 
-   RTX register or const_int representing the required old_value for
+   RTX register representing the required old_value for
    the modify to continue, if NULL no comparsion is performed.  */
 static void
 arm_output_sync_loop (emit_f emit,
@@ -23345,7 +24550,13 @@
 		      enum attr_sync_op sync_op,
 		      int early_barrier_required)
 {
-  rtx operands[1];
+  rtx operands[2];
+  /* We'll use the lo for the normal rtx in the none-DI case
+     as well as the least-sig word in the DI case.  */
+  rtx old_value_lo, required_value_lo, new_value_lo, t1_lo;
+  rtx old_value_hi, required_value_hi, new_value_hi, t1_hi;
+
+  bool is_di = mode == DImode;
 
   gcc_assert (t1 != t2);
 
@@ -23356,82 +24567,142 @@
 
   arm_output_ldrex (emit, mode, old_value, memory);
 
+  if (is_di)
+    {
+      old_value_lo = gen_lowpart (SImode, old_value);
+      old_value_hi = gen_highpart (SImode, old_value);
+      if (required_value)
+	{
+	  required_value_lo = gen_lowpart (SImode, required_value);
+	  required_value_hi = gen_highpart (SImode, required_value);
+	}
+      else
+	{
+	  /* Silence false potentially unused warning.  */
+	  required_value_lo = NULL_RTX;
+	  required_value_hi = NULL_RTX;
+	}
+      new_value_lo = gen_lowpart (SImode, new_value);
+      new_value_hi = gen_highpart (SImode, new_value);
+      t1_lo = gen_lowpart (SImode, t1);
+      t1_hi = gen_highpart (SImode, t1);
+    }
+  else
+    {
+      old_value_lo = old_value;
+      new_value_lo = new_value;
+      required_value_lo = required_value;
+      t1_lo = t1;
+
+      /* Silence false potentially unused warning.  */
+      t1_hi = NULL_RTX;
+      new_value_hi = NULL_RTX;
+      required_value_hi = NULL_RTX;
+      old_value_hi = NULL_RTX;
+    }
+
   if (required_value)
     {
-      rtx operands[2];
+      operands[0] = old_value_lo;
+      operands[1] = required_value_lo;
 
-      operands[0] = old_value;
-      operands[1] = required_value;
       arm_output_asm_insn (emit, 0, operands, "cmp\t%%0, %%1");
+      if (is_di)
+        {
+          arm_output_it (emit, "", "eq");
+          arm_output_op2 (emit, "cmpeq", old_value_hi, required_value_hi);
+        }
       arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYB%%=", LOCAL_LABEL_PREFIX);
     }
 
   switch (sync_op)
     {
     case SYNC_OP_ADD:
-      arm_output_op3 (emit, "add", t1, old_value, new_value);
+      arm_output_op3 (emit, is_di ? "adds" : "add",
+		      t1_lo, old_value_lo, new_value_lo);
+      if (is_di)
+	arm_output_op3 (emit, "adc", t1_hi, old_value_hi, new_value_hi);
       break;
 
     case SYNC_OP_SUB:
-      arm_output_op3 (emit, "sub", t1, old_value, new_value);
+      arm_output_op3 (emit, is_di ? "subs" : "sub",
+		      t1_lo, old_value_lo, new_value_lo);
+      if (is_di)
+	arm_output_op3 (emit, "sbc", t1_hi, old_value_hi, new_value_hi);
       break;
 
     case SYNC_OP_IOR:
-      arm_output_op3 (emit, "orr", t1, old_value, new_value);
+      arm_output_op3 (emit, "orr", t1_lo, old_value_lo, new_value_lo);
+      if (is_di)
+	arm_output_op3 (emit, "orr", t1_hi, old_value_hi, new_value_hi);
       break;
 
     case SYNC_OP_XOR:
-      arm_output_op3 (emit, "eor", t1, old_value, new_value);
+      arm_output_op3 (emit, "eor", t1_lo, old_value_lo, new_value_lo);
+      if (is_di)
+	arm_output_op3 (emit, "eor", t1_hi, old_value_hi, new_value_hi);
       break;
 
     case SYNC_OP_AND:
-      arm_output_op3 (emit,"and", t1, old_value, new_value);
+      arm_output_op3 (emit,"and", t1_lo, old_value_lo, new_value_lo);
+      if (is_di)
+	arm_output_op3 (emit, "and", t1_hi, old_value_hi, new_value_hi);
       break;
 
     case SYNC_OP_NAND:
-      arm_output_op3 (emit, "and", t1, old_value, new_value);
-      arm_output_op2 (emit, "mvn", t1, t1);
+      arm_output_op3 (emit, "and", t1_lo, old_value_lo, new_value_lo);
+      if (is_di)
+	arm_output_op3 (emit, "and", t1_hi, old_value_hi, new_value_hi);
+      arm_output_op2 (emit, "mvn", t1_lo, t1_lo);
+      if (is_di)
+	arm_output_op2 (emit, "mvn", t1_hi, t1_hi);
       break;
 
     case SYNC_OP_NONE:
       t1 = new_value;
+      t1_lo = new_value_lo;
+      if (is_di)
+	t1_hi = new_value_hi;
       break;
     }
 
+  /* Note that the result of strex is a 0/1 flag that's always 1 register.  */
   if (t2)
     {
-       arm_output_strex (emit, mode, "", t2, t1, memory);
-       operands[0] = t2;
-       arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
-       arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
-			    LOCAL_LABEL_PREFIX);
+      arm_output_strex (emit, mode, "", t2, t1, memory);
+      operands[0] = t2;
+      arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
+      arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
+			   LOCAL_LABEL_PREFIX);
     }
   else
     {
       /* Use old_value for the return value because for some operations
 	 the old_value can easily be restored.  This saves one register.  */
-      arm_output_strex (emit, mode, "", old_value, t1, memory);
-      operands[0] = old_value;
+      arm_output_strex (emit, mode, "", old_value_lo, t1, memory);
+      operands[0] = old_value_lo;
       arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
       arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
 			   LOCAL_LABEL_PREFIX);
 
+      /* Note that we only used the _lo half of old_value as a temporary
+	 so in DI we don't have to restore the _hi part.  */
       switch (sync_op)
 	{
 	case SYNC_OP_ADD:
-	  arm_output_op3 (emit, "sub", old_value, t1, new_value);
+	  arm_output_op3 (emit, "sub", old_value_lo, t1_lo, new_value_lo);
 	  break;
 
 	case SYNC_OP_SUB:
-	  arm_output_op3 (emit, "add", old_value, t1, new_value);
+	  arm_output_op3 (emit, "add", old_value_lo, t1_lo, new_value_lo);
 	  break;
 
 	case SYNC_OP_XOR:
-	  arm_output_op3 (emit, "eor", old_value, t1, new_value);
+	  arm_output_op3 (emit, "eor", old_value_lo, t1_lo, new_value_lo);
 	  break;
 
 	case SYNC_OP_NONE:
-	  arm_output_op2 (emit, "mov", old_value, required_value);
+	  arm_output_op2 (emit, "mov", old_value_lo, required_value_lo);
 	  break;
 
 	default:
@@ -23537,7 +24808,7 @@
     target = gen_reg_rtx (mode);
 
   memory = arm_legitimize_sync_memory (memory);
-  if (mode != SImode)
+  if (mode != SImode && mode != DImode)
     {
       rtx load_temp = gen_reg_rtx (SImode);
 
@@ -23556,6 +24827,12 @@
     }
 }
 
+static unsigned int
+arm_autovectorize_vector_sizes (void)
+{
+  return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
+}
+
 static bool
 arm_vector_alignment_reachable (const_tree type, bool is_packed)
 {
@@ -23709,4 +24986,53 @@
     return NO_REGS;
 }
 
+/* Compute the atrribute "length" of insn "*push_multi".
+   So this function MUST be kept in sync with that insn pattern.  */
+int
+arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
+{
+  int i, regno, hi_reg;
+  int num_saves = XVECLEN (parallel_op, 0);
+
+  /* ARM mode.  */
+  if (TARGET_ARM)
+    return 4;
+
+  /* Thumb2 mode.  */
+  regno = REGNO (first_op);
+  hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
+  for (i = 1; i < num_saves && !hi_reg; i++)
+    {
+      regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
+      hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
+    }
+
+  if (!hi_reg)
+    return 2;
+  return 4;
+}
+
+int
+vfp3_const_double_for_fract_bits (rtx operand)
+{
+  REAL_VALUE_TYPE r0;
+  
+  if (GET_CODE (operand) != CONST_DOUBLE)
+    return 0;
+  
+  REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
+  if (exact_real_inverse (DFmode, &r0))
+    {
+      if (exact_real_truncate (DFmode, &r0))
+	{
+	  HOST_WIDE_INT value = real_to_integer (&r0);
+	  value = value & 0xffffffff;
+	  if ((value != 0) && ( (value & (value - 1)) == 0))
+	    return int_log2 (value);
+	}
+    }
+  return 0;
+}
+
 #include "gt-arm.h"
+
--- a/src/gcc/config/arm/arm-cores.def
+++ b/src/gcc/config/arm/arm-cores.def
@@ -70,10 +70,10 @@
 /* V4 Architecture Processors */
 ARM_CORE("arm8",          arm8,		4,	             FL_MODE26 | FL_LDSCHED, fastmul)
 ARM_CORE("arm810",        arm810,	4,	             FL_MODE26 | FL_LDSCHED, fastmul)
-ARM_CORE("strongarm",     strongarm,	4,	             FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul)
-ARM_CORE("strongarm110",  strongarm110,	4,	             FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul)
-ARM_CORE("strongarm1100", strongarm1100, 4,	             FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul)
-ARM_CORE("strongarm1110", strongarm1110, 4,	             FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul)
+ARM_CORE("strongarm",     strongarm,	4,	             FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm)
+ARM_CORE("strongarm110",  strongarm110,	4,	             FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm)
+ARM_CORE("strongarm1100", strongarm1100, 4,	             FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm)
+ARM_CORE("strongarm1110", strongarm1110, 4,	             FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm)
 ARM_CORE("fa526",         fa526,        4,                               FL_LDSCHED, fastmul)
 ARM_CORE("fa626",         fa626,        4,                               FL_LDSCHED, fastmul)
 
@@ -122,15 +122,19 @@
 ARM_CORE("arm1176jzf-s",  arm1176jzfs,	6ZK,				 FL_LDSCHED | FL_VFPV2, 9e)
 ARM_CORE("mpcorenovfp",	  mpcorenovfp,	6K,				 FL_LDSCHED, 9e)
 ARM_CORE("mpcore",	  mpcore,	6K,				 FL_LDSCHED | FL_VFPV2, 9e)
-ARM_CORE("arm1156t2-s",	  arm1156t2s,	6T2,				 FL_LDSCHED, 9e)
-ARM_CORE("arm1156t2f-s",  arm1156t2fs,  6T2,				 FL_LDSCHED | FL_VFPV2, 9e)
-ARM_CORE("cortex-a5",	  cortexa5,	7A,				 FL_LDSCHED, 9e)
-ARM_CORE("cortex-a8",	  cortexa8,	7A,				 FL_LDSCHED, 9e)
+ARM_CORE("arm1156t2-s",	  arm1156t2s,	6T2,				 FL_LDSCHED, v6t2)
+ARM_CORE("arm1156t2f-s",  arm1156t2fs,  6T2,				 FL_LDSCHED | FL_VFPV2, v6t2)
+ARM_CORE("generic-armv7-a", genericv7a,	7A,				 FL_LDSCHED, cortex)
+ARM_CORE("cortex-a5",	  cortexa5,	7A,				 FL_LDSCHED, cortex_a5)
+ARM_CORE("cortex-a7",	  cortexa7,	7A,				 FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex)
+ARM_CORE("cortex-a8",	  cortexa8,	7A,				 FL_LDSCHED, cortex)
 ARM_CORE("cortex-a9",	  cortexa9,	7A,				 FL_LDSCHED, cortex_a9)
-ARM_CORE("cortex-a15",	  cortexa15,	7A,				 FL_LDSCHED, 9e)
-ARM_CORE("cortex-r4",	  cortexr4,	7R,				 FL_LDSCHED, 9e)
-ARM_CORE("cortex-r4f",	  cortexr4f,	7R,				 FL_LDSCHED, 9e)
-ARM_CORE("cortex-m4",	  cortexm4,	7EM,				 FL_LDSCHED, 9e)
-ARM_CORE("cortex-m3",	  cortexm3,	7M,				 FL_LDSCHED, 9e)
-ARM_CORE("cortex-m1",	  cortexm1,	6M,				 FL_LDSCHED, 9e)
-ARM_CORE("cortex-m0",	  cortexm0,	6M,				 FL_LDSCHED, 9e)
+ARM_CORE("cortex-a15",	  cortexa15,	7A,				 FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex)
+ARM_CORE("cortex-r4",	  cortexr4,	7R,				 FL_LDSCHED, cortex)
+ARM_CORE("cortex-r4f",	  cortexr4f,	7R,				 FL_LDSCHED, cortex)
+ARM_CORE("cortex-r5",	  cortexr5,	7R,				 FL_LDSCHED | FL_ARM_DIV, cortex)
+ARM_CORE("cortex-m4",	  cortexm4,	7EM,				 FL_LDSCHED, cortex)
+ARM_CORE("cortex-m3",	  cortexm3,	7M,				 FL_LDSCHED, cortex)
+ARM_CORE("cortex-m1",	  cortexm1,	6M,				 FL_LDSCHED, cortex)
+ARM_CORE("cortex-m0",	  cortexm0,	6M,				 FL_LDSCHED, cortex)
+
--- a/src/gcc/config/arm/arm.h
+++ b/src/gcc/config/arm/arm.h
@@ -47,6 +47,8 @@
     {							\
 	if (TARGET_DSP_MULTIPLY)			\
 	   builtin_define ("__ARM_FEATURE_DSP");	\
+	if (unaligned_access)				\
+	  builtin_define ("__ARM_FEATURE_UNALIGNED");	\
 	/* Define __arm__ even when in thumb mode, for	\
 	   consistency with armcc.  */			\
 	builtin_define ("__arm__");			\
@@ -103,6 +105,8 @@
 	      builtin_define ("__ARM_PCS");		\
 	    builtin_define ("__ARM_EABI__");		\
 	  }						\
+	if (TARGET_IDIV)				\
+	  builtin_define ("__ARM_ARCH_EXT_IDIV__");	\
     } while (0)
 
 /* The various ARM cores.  */
@@ -196,6 +200,7 @@
    Do not define this macro if it does not need to do anything.  */
 #define EXTRA_SPECS						\
   { "subtarget_cpp_spec",	SUBTARGET_CPP_SPEC },           \
+  { "asm_cpu_spec",		ASM_CPU_SPEC },			\
   SUBTARGET_EXTRA_SPECS
 
 #ifndef SUBTARGET_EXTRA_SPECS
@@ -284,7 +289,8 @@
   (TARGET_32BIT && arm_arch6 && (arm_arch_notm || arm_arch7em))
 
 /* Should MOVW/MOVT be used in preference to a constant pool.  */
-#define TARGET_USE_MOVT (arm_arch_thumb2 && !optimize_size)
+#define TARGET_USE_MOVT \
+  (arm_arch_thumb2 && !optimize_size && !current_tune->prefer_constant_pool)
 
 /* We could use unified syntax for arm mode, but for now we just use it
    for Thumb-2.  */
@@ -303,8 +309,16 @@
 /* Nonzero if this chip supports ldrex and strex */
 #define TARGET_HAVE_LDREX	((arm_arch6 && TARGET_ARM) || arm_arch7)
 
-/* Nonzero if this chip supports ldrex{bhd} and strex{bhd}.  */
-#define TARGET_HAVE_LDREXBHD	((arm_arch6k && TARGET_ARM) || arm_arch7)
+/* Nonzero if this chip supports ldrex{bh} and strex{bh}.  */
+#define TARGET_HAVE_LDREXBH	((arm_arch6k && TARGET_ARM) || arm_arch7)
+
+/* Nonzero if this chip supports ldrexd and strexd.  */
+#define TARGET_HAVE_LDREXD	(((arm_arch6k && TARGET_ARM) || arm_arch7) \
+				 && arm_arch_notm)
+
+/* Nonzero if integer division instructions supported.  */
+#define TARGET_IDIV		((TARGET_ARM && arm_arch_arm_hwdiv) \
+				 || (TARGET_THUMB2 && arm_arch_thumb_hwdiv))
 
 /* True iff the full BPABI is being used.  If TARGET_BPABI is true,
    then TARGET_AAPCS_BASED must be true -- but the converse does not
@@ -490,8 +504,11 @@
 /* Nonzero if chip supports Thumb 2.  */
 extern int arm_arch_thumb2;
 
-/* Nonzero if chip supports integer division instruction.  */
-extern int arm_arch_hwdiv;
+/* Nonzero if chip supports integer division instruction in ARM mode.  */
+extern int arm_arch_arm_hwdiv;
+
+/* Nonzero if chip supports integer division instruction in Thumb mode.  */
+extern int arm_arch_thumb_hwdiv;
 
 #ifndef TARGET_DEFAULT
 #define TARGET_DEFAULT  (MASK_APCS_FRAME)
@@ -1172,12 +1189,12 @@
 }
 
 /* FPA registers can't do subreg as all values are reformatted to internal
-   precision.  VFP registers may only be accessed in the mode they
-   were set.  */
-#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS)	\
-  (GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO)		\
-   ? reg_classes_intersect_p (FPA_REGS, (CLASS))	\
-     || reg_classes_intersect_p (VFP_REGS, (CLASS))	\
+   precision.  In VFPv1, VFP registers could only be accessed in the mode
+   they were set, so subregs would be invalid there too.  However, we don't
+   support VFPv1 at the moment, and the restriction was lifted in VFPv2.  */
+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS)		\
+  (GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO)			\
+   ? reg_classes_intersect_p (FPA_REGS, (CLASS))		\
    : 0)
 
 /* The class value for index registers, and the one for base regs.  */
@@ -1188,7 +1205,7 @@
    when addressing quantities in QI or HI mode; if we don't know the
    mode, then we must be conservative.  */
 #define MODE_BASE_REG_CLASS(MODE)					\
-    (TARGET_32BIT ? CORE_REGS :					\
+    (TARGET_ARM || (TARGET_THUMB2 && !optimize_size) ? CORE_REGS :      \
      (((MODE) == SImode) ? BASE_REGS : LO_REGS))
 
 /* For Thumb we can not support SP+reg addressing, so we return LO_REGS
@@ -1778,27 +1795,6 @@
 #define TARGET_DEFAULT_WORD_RELOCATIONS 0
 #endif
 
-/* Nonzero if the constant value X is a legitimate general operand.
-   It is given that X satisfies CONSTANT_P or is a CONST_DOUBLE.
-
-   On the ARM, allow any integer (invalid ones are removed later by insn
-   patterns), nice doubles and symbol_refs which refer to the function's
-   constant pool XXX.
-
-   When generating pic allow anything.  */
-#define ARM_LEGITIMATE_CONSTANT_P(X)	(flag_pic || ! label_mentioned_p (X))
-
-#define THUMB_LEGITIMATE_CONSTANT_P(X)	\
- (   GET_CODE (X) == CONST_INT		\
-  || GET_CODE (X) == CONST_DOUBLE	\
-  || CONSTANT_ADDRESS_P (X)		\
-  || flag_pic)
-
-#define LEGITIMATE_CONSTANT_P(X)			\
-  (!arm_cannot_force_const_mem (X)			\
-   && (TARGET_32BIT ? ARM_LEGITIMATE_CONSTANT_P (X)	\
-		    : THUMB_LEGITIMATE_CONSTANT_P (X)))
-
 #ifndef SUBTARGET_NAME_ENCODING_LENGTHS
 #define SUBTARGET_NAME_ENCODING_LENGTHS
 #endif
@@ -1973,7 +1969,7 @@
       : min >= -4096 && max < 4096					\
       ? (ADDR_DIFF_VEC_FLAGS (body).offset_unsigned = 0, HImode)	\
       : SImode)								\
-   : ((min < 0 || max >= 0x2000 || !TARGET_THUMB2) ? SImode		\
+   : ((min < 0 || max >= 0x20000 || !TARGET_THUMB2) ? SImode		\
       : (max >= 0x200) ? HImode						\
       : QImode))
 
@@ -2042,7 +2038,8 @@
 /* Try to generate sequences that don't involve branches, we can then use
    conditional instructions */
 #define BRANCH_COST(speed_p, predictable_p) \
-  (TARGET_32BIT ? 4 : (optimize > 0 ? 2 : 0))
+  (current_tune->branch_cost (speed_p, predictable_p))
+
 
 /* Position Independent Code.  */
 /* We decide which register to use based on the compilation options and
@@ -2280,178 +2277,6 @@
    : arm_gen_return_addr_mask ())
 
 
-/* Neon defines builtins from ARM_BUILTIN_MAX upwards, though they don't have
-   symbolic names defined here (which would require too much duplication).
-   FIXME?  */
-enum arm_builtins
-{
-  ARM_BUILTIN_GETWCX,
-  ARM_BUILTIN_SETWCX,
-
-  ARM_BUILTIN_WZERO,
-
-  ARM_BUILTIN_WAVG2BR,
-  ARM_BUILTIN_WAVG2HR,
-  ARM_BUILTIN_WAVG2B,
-  ARM_BUILTIN_WAVG2H,
-
-  ARM_BUILTIN_WACCB,
-  ARM_BUILTIN_WACCH,
-  ARM_BUILTIN_WACCW,
-
-  ARM_BUILTIN_WMACS,
-  ARM_BUILTIN_WMACSZ,
-  ARM_BUILTIN_WMACU,
-  ARM_BUILTIN_WMACUZ,
-
-  ARM_BUILTIN_WSADB,
-  ARM_BUILTIN_WSADBZ,
-  ARM_BUILTIN_WSADH,
-  ARM_BUILTIN_WSADHZ,
-
-  ARM_BUILTIN_WALIGN,
-
-  ARM_BUILTIN_TMIA,
-  ARM_BUILTIN_TMIAPH,
-  ARM_BUILTIN_TMIABB,
-  ARM_BUILTIN_TMIABT,
-  ARM_BUILTIN_TMIATB,
-  ARM_BUILTIN_TMIATT,
-
-  ARM_BUILTIN_TMOVMSKB,
-  ARM_BUILTIN_TMOVMSKH,
-  ARM_BUILTIN_TMOVMSKW,
-
-  ARM_BUILTIN_TBCSTB,
-  ARM_BUILTIN_TBCSTH,
-  ARM_BUILTIN_TBCSTW,
-
-  ARM_BUILTIN_WMADDS,
-  ARM_BUILTIN_WMADDU,
-
-  ARM_BUILTIN_WPACKHSS,
-  ARM_BUILTIN_WPACKWSS,
-  ARM_BUILTIN_WPACKDSS,
-  ARM_BUILTIN_WPACKHUS,
-  ARM_BUILTIN_WPACKWUS,
-  ARM_BUILTIN_WPACKDUS,
-
-  ARM_BUILTIN_WADDB,
-  ARM_BUILTIN_WADDH,
-  ARM_BUILTIN_WADDW,
-  ARM_BUILTIN_WADDSSB,
-  ARM_BUILTIN_WADDSSH,
-  ARM_BUILTIN_WADDSSW,
-  ARM_BUILTIN_WADDUSB,
-  ARM_BUILTIN_WADDUSH,
-  ARM_BUILTIN_WADDUSW,
-  ARM_BUILTIN_WSUBB,
-  ARM_BUILTIN_WSUBH,
-  ARM_BUILTIN_WSUBW,
-  ARM_BUILTIN_WSUBSSB,
-  ARM_BUILTIN_WSUBSSH,
-  ARM_BUILTIN_WSUBSSW,
-  ARM_BUILTIN_WSUBUSB,
-  ARM_BUILTIN_WSUBUSH,
-  ARM_BUILTIN_WSUBUSW,
-
-  ARM_BUILTIN_WAND,
-  ARM_BUILTIN_WANDN,
-  ARM_BUILTIN_WOR,
-  ARM_BUILTIN_WXOR,
-
-  ARM_BUILTIN_WCMPEQB,
-  ARM_BUILTIN_WCMPEQH,
-  ARM_BUILTIN_WCMPEQW,
-  ARM_BUILTIN_WCMPGTUB,
-  ARM_BUILTIN_WCMPGTUH,
-  ARM_BUILTIN_WCMPGTUW,
-  ARM_BUILTIN_WCMPGTSB,
-  ARM_BUILTIN_WCMPGTSH,
-  ARM_BUILTIN_WCMPGTSW,
-
-  ARM_BUILTIN_TEXTRMSB,
-  ARM_BUILTIN_TEXTRMSH,
-  ARM_BUILTIN_TEXTRMSW,
-  ARM_BUILTIN_TEXTRMUB,
-  ARM_BUILTIN_TEXTRMUH,
-  ARM_BUILTIN_TEXTRMUW,
-  ARM_BUILTIN_TINSRB,
-  ARM_BUILTIN_TINSRH,
-  ARM_BUILTIN_TINSRW,
-
-  ARM_BUILTIN_WMAXSW,
-  ARM_BUILTIN_WMAXSH,
-  ARM_BUILTIN_WMAXSB,
-  ARM_BUILTIN_WMAXUW,
-  ARM_BUILTIN_WMAXUH,
-  ARM_BUILTIN_WMAXUB,
-  ARM_BUILTIN_WMINSW,
-  ARM_BUILTIN_WMINSH,
-  ARM_BUILTIN_WMINSB,
-  ARM_BUILTIN_WMINUW,
-  ARM_BUILTIN_WMINUH,
-  ARM_BUILTIN_WMINUB,
-
-  ARM_BUILTIN_WMULUM,
-  ARM_BUILTIN_WMULSM,
-  ARM_BUILTIN_WMULUL,
-
-  ARM_BUILTIN_PSADBH,
-  ARM_BUILTIN_WSHUFH,
-
-  ARM_BUILTIN_WSLLH,
-  ARM_BUILTIN_WSLLW,
-  ARM_BUILTIN_WSLLD,
-  ARM_BUILTIN_WSRAH,
-  ARM_BUILTIN_WSRAW,
-  ARM_BUILTIN_WSRAD,
-  ARM_BUILTIN_WSRLH,
-  ARM_BUILTIN_WSRLW,
-  ARM_BUILTIN_WSRLD,
-  ARM_BUILTIN_WRORH,
-  ARM_BUILTIN_WRORW,
-  ARM_BUILTIN_WRORD,
-  ARM_BUILTIN_WSLLHI,
-  ARM_BUILTIN_WSLLWI,
-  ARM_BUILTIN_WSLLDI,
-  ARM_BUILTIN_WSRAHI,
-  ARM_BUILTIN_WSRAWI,
-  ARM_BUILTIN_WSRADI,
-  ARM_BUILTIN_WSRLHI,
-  ARM_BUILTIN_WSRLWI,
-  ARM_BUILTIN_WSRLDI,
-  ARM_BUILTIN_WRORHI,
-  ARM_BUILTIN_WRORWI,
-  ARM_BUILTIN_WRORDI,
-
-  ARM_BUILTIN_WUNPCKIHB,
-  ARM_BUILTIN_WUNPCKIHH,
-  ARM_BUILTIN_WUNPCKIHW,
-  ARM_BUILTIN_WUNPCKILB,
-  ARM_BUILTIN_WUNPCKILH,
-  ARM_BUILTIN_WUNPCKILW,
-
-  ARM_BUILTIN_WUNPCKEHSB,
-  ARM_BUILTIN_WUNPCKEHSH,
-  ARM_BUILTIN_WUNPCKEHSW,
-  ARM_BUILTIN_WUNPCKEHUB,
-  ARM_BUILTIN_WUNPCKEHUH,
-  ARM_BUILTIN_WUNPCKEHUW,
-  ARM_BUILTIN_WUNPCKELSB,
-  ARM_BUILTIN_WUNPCKELSH,
-  ARM_BUILTIN_WUNPCKELSW,
-  ARM_BUILTIN_WUNPCKELUB,
-  ARM_BUILTIN_WUNPCKELUH,
-  ARM_BUILTIN_WUNPCKELUW,
-
-  ARM_BUILTIN_THREAD_POINTER,
-
-  ARM_BUILTIN_NEON_BASE,
-
-  ARM_BUILTIN_MAX = ARM_BUILTIN_NEON_BASE  /* FIXME: Wrong!  */
-};
-
 /* Do not emit .note.GNU-stack by default.  */
 #ifndef NEED_INDICATE_EXEC_STACK
 #define NEED_INDICATE_EXEC_STACK	0
@@ -2461,4 +2286,25 @@
    instruction.  */
 #define MAX_LDM_STM_OPS 4
 
+#define ASM_CPU_SPEC \
+   " %{mcpu=generic-*:-march=%*;"				\
+   "   :%{mcpu=*:-mcpu=%*} %{march=*:-march=%*}}"
+
+/* -mcpu=native handling only makes sense with compiler running on
+   an ARM chip.  */
+#if defined(__arm__)
+extern const char *host_detect_local_cpu (int argc, const char **argv);
+# define EXTRA_SPEC_FUNCTIONS						\
+  { "local_cpu_detect", host_detect_local_cpu },
+
+# define MCPU_MTUNE_NATIVE_SPECS					\
+   " %{march=native:%<march=native %:local_cpu_detect(arch)}"		\
+   " %{mcpu=native:%<mcpu=native %:local_cpu_detect(cpu)}"		\
+   " %{mtune=native:%<mtune=native %:local_cpu_detect(tune)}"
+#else
+# define MCPU_MTUNE_NATIVE_SPECS ""
+#endif
+
+#define DRIVER_SELF_SPECS MCPU_MTUNE_NATIVE_SPECS
+
 #endif /* ! GCC_ARM_H */
--- a/src/gcc/config/arm/arm.md
+++ b/src/gcc/config/arm/arm.md
@@ -48,6 +48,15 @@
    (DOM_CC_X_OR_Y   2)
   ]
 )
+;; conditional compare combination
+(define_constants
+  [(CMP_CMP 0)
+   (CMN_CMP 1)
+   (CMP_CMN 2)
+   (CMN_CMN 3)
+   (NUM_OF_COND_CMP 4)
+  ]
+)
 
 ;; UNSPEC Usage:
 ;; Note: sin and cos are no-longer used.
@@ -105,6 +114,10 @@
                              ; another symbolic address.
    (UNSPEC_MEMORY_BARRIER 28) ; Represent a memory barrier.
    (UNSPEC_PIC_UNIFIED 29)  ; Create a common pic addressing form.
+   (UNSPEC_UNALIGNED_LOAD 30)  ; Used to represent ldr/ldrh instructions that access
+                               ; unaligned locations, on architectures which support
+                               ; that.
+   (UNSPEC_UNALIGNED_STORE 31) ; Same for str/strh.
   ]
 )
 
@@ -144,6 +157,7 @@
    (VUNSPEC_SYNC_OP               23)	; Represent a sync_<op>
    (VUNSPEC_SYNC_NEW_OP           24)	; Represent a sync_new_<op>
    (VUNSPEC_SYNC_OLD_OP           25)	; Represent a sync_old_<op>
+   (VUNSPEC_SYNC_RELEASE 	  26)	; Represent a sync_lock_release.
   ]
 )
 
@@ -272,7 +286,7 @@
 ;; scheduling information.
 
 (define_attr "insn"
-        "mov,mvn,smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals,smlawy,smuad,smuadx,smlad,smladx,smusd,smusdx,smlsd,smlsdx,smmul,smmulr,smmla,umaal,smlald,smlsld,clz,mrs,msr,xtab,sdiv,udiv,other"
+        "mov,mvn,smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals,smlawy,smuad,smuadx,smlad,smladx,smusd,smusdx,smlsd,smlsdx,smmul,smmulr,smmla,umaal,smlald,smlsld,clz,mrs,msr,xtab,sdiv,udiv,sat,other"
         (const_string "other"))
 
 ; TYPE attribute is used to detect floating point instructions which, if
@@ -333,6 +347,13 @@
 	 (const_string "mult")
 	 (const_string "alu")))
 
+; Is this an (integer side) multiply with a 64-bit result?
+(define_attr "mul64" "no,yes"
+	     (if_then_else
+	       (eq_attr "insn" "smlalxy,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals")
+	       (const_string "yes")
+	       (const_string "no")))
+
 ; Load scheduling, set from the arm_ld_sched variable
 ; initialized by arm_option_override()
 (define_attr "ldsched" "no,yes" (const (symbol_ref "arm_ld_sched")))
@@ -491,7 +512,7 @@
 
 (define_attr "tune_cortexr4" "yes,no"
   (const (if_then_else
-	  (eq_attr "tune" "cortexr4,cortexr4f")
+	  (eq_attr "tune" "cortexr4,cortexr4f,cortexr5")
 	  (const_string "yes")
 	  (const_string "no"))))
 
@@ -499,7 +520,7 @@
 
 (define_attr "generic_sched" "yes,no"
   (const (if_then_else
-          (ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa8,cortexa9,cortexm4")
+          (ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa8,cortexa9,cortexa15,cortexm4")
 	       (eq_attr "tune_cortexr4" "yes"))
           (const_string "no")
           (const_string "yes"))))
@@ -525,6 +546,7 @@
 (include "cortex-a5.md")
 (include "cortex-a8.md")
 (include "cortex-a9.md")
+(include "cortex-a15.md")
 (include "cortex-r4.md")
 (include "cortex-r4f.md")
 (include "cortex-m4.md")
@@ -702,21 +724,24 @@
 ;;  (plus (reg rN) (reg sp)) into (reg rN).  In this case reload will
 ;; put the duplicated register first, and not try the commutative version.
 (define_insn_and_split "*arm_addsi3"
-  [(set (match_operand:SI          0 "s_register_operand" "=r, k,r,r, k,r")
-	(plus:SI (match_operand:SI 1 "s_register_operand" "%rk,k,r,rk,k,rk")
-		 (match_operand:SI 2 "reg_or_int_operand" "rI,rI,k,L, L,?n")))]
+  [(set (match_operand:SI          0 "s_register_operand" "=r, k,r,r, k, r, k,r, k, r")
+	(plus:SI (match_operand:SI 1 "s_register_operand" "%rk,k,r,rk,k, rk,k,rk,k, rk")
+		 (match_operand:SI 2 "reg_or_int_operand" "rI,rI,k,Pj,Pj,L, L,PJ,PJ,?n")))]
   "TARGET_32BIT"
   "@
    add%?\\t%0, %1, %2
    add%?\\t%0, %1, %2
    add%?\\t%0, %2, %1
+   addw%?\\t%0, %1, %2
+   addw%?\\t%0, %1, %2
    sub%?\\t%0, %1, #%n2
    sub%?\\t%0, %1, #%n2
+   subw%?\\t%0, %1, #%n2
+   subw%?\\t%0, %1, #%n2
    #"
   "TARGET_32BIT
    && GET_CODE (operands[2]) == CONST_INT
-   && !(const_ok_for_arm (INTVAL (operands[2]))
-        || const_ok_for_arm (-INTVAL (operands[2])))
+   && !const_ok_for_op (INTVAL (operands[2]), PLUS)
    && (reload_completed || !arm_eliminable_register (operands[1]))"
   [(clobber (const_int 0))]
   "
@@ -725,8 +750,9 @@
 		      operands[1], 0);
   DONE;
   "
-  [(set_attr "length" "4,4,4,4,4,16")
-   (set_attr "predicable" "yes")]
+  [(set_attr "length" "4,4,4,4,4,4,4,4,4,16")
+   (set_attr "predicable" "yes")
+   (set_attr "arch" "*,*,*,t2,t2,*,*,t2,t2,*")]
 )
 
 (define_insn_and_split "*thumb1_addsi3"
@@ -792,7 +818,7 @@
   ""
 )
 
-(define_insn "*addsi3_compare0"
+(define_insn "addsi3_compare0"
   [(set (reg:CC_NOOV CC_REGNUM)
 	(compare:CC_NOOV
 	 (plus:SI (match_operand:SI 1 "s_register_operand" "r, r")
@@ -1807,7 +1833,37 @@
    (set_attr "predicable" "yes")]
 )
 
-(define_insn "*maddhidi4"
+;; Note: there is no maddhisi4ibt because this one is canonical form
+(define_insn "*maddhisi4tb"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(plus:SI (mult:SI (ashiftrt:SI
+			   (match_operand:SI 1 "s_register_operand" "r")
+			   (const_int 16))
+			  (sign_extend:SI
+			   (match_operand:HI 2 "s_register_operand" "r")))
+		 (match_operand:SI 3 "s_register_operand" "r")))]
+  "TARGET_DSP_MULTIPLY"
+  "smlatb%?\\t%0, %1, %2, %3"
+  [(set_attr "insn" "smlaxy")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*maddhisi4tt"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(plus:SI (mult:SI (ashiftrt:SI
+			   (match_operand:SI 1 "s_register_operand" "r")
+			   (const_int 16))
+			  (ashiftrt:SI
+			   (match_operand:SI 2 "s_register_operand" "r")
+			   (const_int 16)))
+		 (match_operand:SI 3 "s_register_operand" "r")))]
+  "TARGET_DSP_MULTIPLY"
+  "smlatt%?\\t%0, %1, %2, %3"
+  [(set_attr "insn" "smlaxy")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "maddhidi4"
   [(set (match_operand:DI 0 "s_register_operand" "=r")
 	(plus:DI
 	  (mult:DI (sign_extend:DI
@@ -1820,6 +1876,39 @@
   [(set_attr "insn" "smlalxy")
    (set_attr "predicable" "yes")])
 
+;; Note: there is no maddhidi4ibt because this one is canonical form
+(define_insn "*maddhidi4tb"
+  [(set (match_operand:DI 0 "s_register_operand" "=r")
+	(plus:DI
+	  (mult:DI (sign_extend:DI
+		    (ashiftrt:SI
+		     (match_operand:SI 1 "s_register_operand" "r")
+		     (const_int 16)))
+		   (sign_extend:DI
+		    (match_operand:HI 2 "s_register_operand" "r")))
+	  (match_operand:DI 3 "s_register_operand" "0")))]
+  "TARGET_DSP_MULTIPLY"
+  "smlaltb%?\\t%Q0, %R0, %1, %2"
+  [(set_attr "insn" "smlalxy")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*maddhidi4tt"
+  [(set (match_operand:DI 0 "s_register_operand" "=r")
+	(plus:DI
+	  (mult:DI (sign_extend:DI
+		    (ashiftrt:SI
+		     (match_operand:SI 1 "s_register_operand" "r")
+		     (const_int 16)))
+		   (sign_extend:DI
+		    (ashiftrt:SI
+		     (match_operand:SI 2 "s_register_operand" "r")
+		     (const_int 16))))
+	  (match_operand:DI 3 "s_register_operand" "0")))]
+  "TARGET_DSP_MULTIPLY"
+  "smlaltt%?\\t%Q0, %R0, %1, %2"
+  [(set_attr "insn" "smlalxy")
+   (set_attr "predicable" "yes")])
+
 (define_expand "mulsf3"
   [(set (match_operand:SF          0 "s_register_operand" "")
 	(mult:SF (match_operand:SF 1 "s_register_operand" "")
@@ -2385,10 +2474,10 @@
 ;;; this insv pattern, so this pattern needs to be reevalutated.
 
 (define_expand "insv"
-  [(set (zero_extract:SI (match_operand:SI 0 "s_register_operand" "")
-                         (match_operand:SI 1 "general_operand" "")
-                         (match_operand:SI 2 "general_operand" ""))
-        (match_operand:SI 3 "reg_or_int_operand" ""))]
+  [(set (zero_extract (match_operand 0 "nonimmediate_operand" "")
+                      (match_operand 1 "general_operand" "")
+                      (match_operand 2 "general_operand" ""))
+        (match_operand 3 "reg_or_int_operand" ""))]
   "TARGET_ARM || arm_arch_thumb2"
   "
   {
@@ -2399,35 +2488,70 @@
 
     if (arm_arch_thumb2)
       {
-	bool use_bfi = TRUE;
-
-	if (GET_CODE (operands[3]) == CONST_INT)
+        if (unaligned_access && MEM_P (operands[0])
+	    && s_register_operand (operands[3], GET_MODE (operands[3]))
+	    && (width == 16 || width == 32) && (start_bit % BITS_PER_UNIT) == 0)
 	  {
-	    HOST_WIDE_INT val = INTVAL (operands[3]) & mask;
+	    rtx base_addr;
+
+	    if (BYTES_BIG_ENDIAN)
+	      start_bit = GET_MODE_BITSIZE (GET_MODE (operands[3])) - width
+			  - start_bit;
 
-	    if (val == 0)
+	    if (width == 32)
 	      {
-		emit_insn (gen_insv_zero (operands[0], operands[1],
-					  operands[2]));
-		DONE;
+	        base_addr = adjust_address (operands[0], SImode,
+					    start_bit / BITS_PER_UNIT);
+		emit_insn (gen_unaligned_storesi (base_addr, operands[3]));
 	      }
+	    else
+	      {
+	        rtx tmp = gen_reg_rtx (HImode);
 
-	    /* See if the set can be done with a single orr instruction.  */
-	    if (val == mask && const_ok_for_arm (val << start_bit))
-	      use_bfi = FALSE;
+	        base_addr = adjust_address (operands[0], HImode,
+					    start_bit / BITS_PER_UNIT);
+		emit_move_insn (tmp, gen_lowpart (HImode, operands[3]));
+		emit_insn (gen_unaligned_storehi (base_addr, tmp));
+	      }
+	    DONE;
 	  }
-	  
-	if (use_bfi)
+	else if (s_register_operand (operands[0], GET_MODE (operands[0])))
 	  {
-	    if (GET_CODE (operands[3]) != REG)
-	      operands[3] = force_reg (SImode, operands[3]);
+	    bool use_bfi = TRUE;
 
-	    emit_insn (gen_insv_t2 (operands[0], operands[1], operands[2],
-				    operands[3]));
-	    DONE;
+	    if (GET_CODE (operands[3]) == CONST_INT)
+	      {
+		HOST_WIDE_INT val = INTVAL (operands[3]) & mask;
+
+		if (val == 0)
+		  {
+		    emit_insn (gen_insv_zero (operands[0], operands[1],
+					      operands[2]));
+		    DONE;
+		  }
+
+		/* See if the set can be done with a single orr instruction.  */
+		if (val == mask && const_ok_for_arm (val << start_bit))
+		  use_bfi = FALSE;
+	      }
+
+	    if (use_bfi)
+	      {
+		if (GET_CODE (operands[3]) != REG)
+		  operands[3] = force_reg (SImode, operands[3]);
+
+		emit_insn (gen_insv_t2 (operands[0], operands[1], operands[2],
+					operands[3]));
+		DONE;
+	      }
 	  }
+	else
+	  FAIL;
       }
 
+    if (!s_register_operand (operands[0], GET_MODE (operands[0])))
+      FAIL;
+
     target = copy_rtx (operands[0]);
     /* Avoid using a subreg as a subtarget, and avoid writing a paradoxical 
        subreg as the final target.  */
@@ -3300,6 +3424,60 @@
 		      (const_int 12)))]
 )
 
+(define_code_iterator SAT [smin smax])
+(define_code_iterator SATrev [smin smax])
+(define_code_attr SATlo [(smin "1") (smax "2")])
+(define_code_attr SAThi [(smin "2") (smax "1")])
+
+(define_insn "*satsi_<SAT:code>"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+        (SAT:SI (SATrev:SI (match_operand:SI 3 "s_register_operand" "r")
+                           (match_operand:SI 1 "const_int_operand" "i"))
+                (match_operand:SI 2 "const_int_operand" "i")))]
+  "TARGET_32BIT && arm_arch6 && <SAT:CODE> != <SATrev:CODE>
+   && arm_sat_operator_match (operands[<SAT:SATlo>], operands[<SAT:SAThi>], NULL, NULL)"
+{
+  int mask;
+  bool signed_sat;
+  if (!arm_sat_operator_match (operands[<SAT:SATlo>], operands[<SAT:SAThi>],
+                               &mask, &signed_sat))
+    gcc_unreachable ();
+
+  operands[1] = GEN_INT (mask);
+  if (signed_sat)
+    return "ssat%?\t%0, %1, %3";
+  else
+    return "usat%?\t%0, %1, %3";
+}
+  [(set_attr "predicable" "yes")
+   (set_attr "insn" "sat")])
+
+(define_insn "*satsi_<SAT:code>_shift"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+        (SAT:SI (SATrev:SI (match_operator:SI 3 "sat_shift_operator"
+                             [(match_operand:SI 4 "s_register_operand" "r")
+                              (match_operand:SI 5 "const_int_operand" "i")])
+                           (match_operand:SI 1 "const_int_operand" "i"))
+                (match_operand:SI 2 "const_int_operand" "i")))]
+  "TARGET_32BIT && arm_arch6 && <SAT:CODE> != <SATrev:CODE>
+   && arm_sat_operator_match (operands[<SAT:SATlo>], operands[<SAT:SAThi>], NULL, NULL)"
+{
+  int mask;
+  bool signed_sat;
+  if (!arm_sat_operator_match (operands[<SAT:SATlo>], operands[<SAT:SAThi>],
+                               &mask, &signed_sat))
+    gcc_unreachable ();
+
+  operands[1] = GEN_INT (mask);
+  if (signed_sat)
+    return "ssat%?\t%0, %1, %4%S3";
+  else
+    return "usat%?\t%0, %1, %4%S3";
+}
+  [(set_attr "predicable" "yes")
+   (set_attr "insn" "sat")
+   (set_attr "shift" "3")
+   (set_attr "type" "alu_shift")])
 
 ;; Shift and rotation insns
 
@@ -3619,12 +3797,10 @@
 ;; to reduce register pressure later on.
 
 (define_expand "extzv"
-  [(set (match_dup 4)
-	(ashift:SI (match_operand:SI   1 "register_operand" "")
-		   (match_operand:SI   2 "const_int_operand" "")))
-   (set (match_operand:SI              0 "register_operand" "")
-	(lshiftrt:SI (match_dup 4)
-		     (match_operand:SI 3 "const_int_operand" "")))]
+  [(set (match_operand 0 "s_register_operand" "")
+	(zero_extract (match_operand 1 "nonimmediate_operand" "")
+		      (match_operand 2 "const_int_operand" "")
+		      (match_operand 3 "const_int_operand" "")))]
   "TARGET_THUMB1 || arm_arch_thumb2"
   "
   {
@@ -3633,10 +3809,57 @@
     
     if (arm_arch_thumb2)
       {
-	emit_insn (gen_extzv_t2 (operands[0], operands[1], operands[2],
-				 operands[3]));
-	DONE;
+	HOST_WIDE_INT width = INTVAL (operands[2]);
+	HOST_WIDE_INT bitpos = INTVAL (operands[3]);
+
+	if (unaligned_access && MEM_P (operands[1])
+	    && (width == 16 || width == 32) && (bitpos % BITS_PER_UNIT) == 0)
+	  {
+	    rtx base_addr;
+
+	    if (BYTES_BIG_ENDIAN)
+	      bitpos = GET_MODE_BITSIZE (GET_MODE (operands[0])) - width
+		       - bitpos;
+
+	    if (width == 32)
+              {
+		base_addr = adjust_address (operands[1], SImode,
+					    bitpos / BITS_PER_UNIT);
+		emit_insn (gen_unaligned_loadsi (operands[0], base_addr));
+              }
+	    else
+              {
+		rtx dest = operands[0];
+		rtx tmp = gen_reg_rtx (SImode);
+
+		/* We may get a paradoxical subreg here.  Strip it off.  */
+		if (GET_CODE (dest) == SUBREG
+		    && GET_MODE (dest) == SImode
+		    && GET_MODE (SUBREG_REG (dest)) == HImode)
+		  dest = SUBREG_REG (dest);
+
+		if (GET_MODE_BITSIZE (GET_MODE (dest)) != width)
+		  FAIL;
+
+		base_addr = adjust_address (operands[1], HImode,
+					    bitpos / BITS_PER_UNIT);
+		emit_insn (gen_unaligned_loadhiu (tmp, base_addr));
+		emit_move_insn (gen_lowpart (SImode, dest), tmp);
+	      }
+	    DONE;
+	  }
+	else if (s_register_operand (operands[1], GET_MODE (operands[1])))
+	  {
+	    emit_insn (gen_extzv_t2 (operands[0], operands[1], operands[2],
+				     operands[3]));
+	    DONE;
+	  }
+	else
+	  FAIL;
       }
+    
+    if (!s_register_operand (operands[1], GET_MODE (operands[1])))
+      FAIL;
 
     operands[3] = GEN_INT (rshift);
     
@@ -3646,12 +3869,154 @@
         DONE;
       }
       
-    operands[2] = GEN_INT (lshift);
-    operands[4] = gen_reg_rtx (SImode);
+    emit_insn (gen_extzv_t1 (operands[0], operands[1], GEN_INT (lshift),
+			     operands[3], gen_reg_rtx (SImode)));
+    DONE;
   }"
 )
 
-(define_insn "extv"
+;; Helper for extzv, for the Thumb-1 register-shifts case.
+
+(define_expand "extzv_t1"
+  [(set (match_operand:SI 4 "s_register_operand" "")
+	(ashift:SI (match_operand:SI 1 "nonimmediate_operand" "")
+		   (match_operand:SI 2 "const_int_operand" "")))
+   (set (match_operand:SI 0 "s_register_operand" "")
+	(lshiftrt:SI (match_dup 4)
+		     (match_operand:SI 3 "const_int_operand" "")))]
+  "TARGET_THUMB1"
+  "")
+
+(define_expand "extv"
+  [(set (match_operand 0 "s_register_operand" "")
+	(sign_extract (match_operand 1 "nonimmediate_operand" "")
+		      (match_operand 2 "const_int_operand" "")
+		      (match_operand 3 "const_int_operand" "")))]
+  "arm_arch_thumb2"
+{
+  HOST_WIDE_INT width = INTVAL (operands[2]);
+  HOST_WIDE_INT bitpos = INTVAL (operands[3]);
+
+  if (unaligned_access && MEM_P (operands[1]) && (width == 16 || width == 32)
+      && (bitpos % BITS_PER_UNIT)  == 0)
+    {
+      rtx base_addr;
+      
+      if (BYTES_BIG_ENDIAN)
+	bitpos = GET_MODE_BITSIZE (GET_MODE (operands[0])) - width - bitpos;
+      
+      if (width == 32)
+        {
+	  base_addr = adjust_address (operands[1], SImode,
+				      bitpos / BITS_PER_UNIT);
+	  emit_insn (gen_unaligned_loadsi (operands[0], base_addr));
+        }
+      else
+        {
+	  rtx dest = operands[0];
+	  rtx tmp = gen_reg_rtx (SImode);
+	  
+	  /* We may get a paradoxical subreg here.  Strip it off.  */
+	  if (GET_CODE (dest) == SUBREG
+	      && GET_MODE (dest) == SImode
+	      && GET_MODE (SUBREG_REG (dest)) == HImode)
+	    dest = SUBREG_REG (dest);
+	  
+	  if (GET_MODE_BITSIZE (GET_MODE (dest)) != width)
+	    FAIL;
+	  
+	  base_addr = adjust_address (operands[1], HImode,
+				      bitpos / BITS_PER_UNIT);
+	  emit_insn (gen_unaligned_loadhis (tmp, base_addr));
+	  emit_move_insn (gen_lowpart (SImode, dest), tmp);
+	}
+
+      DONE;
+    }
+  else if (!s_register_operand (operands[1], GET_MODE (operands[1])))
+    FAIL;
+  else if (GET_MODE (operands[0]) == SImode
+	   && GET_MODE (operands[1]) == SImode)
+    {
+      emit_insn (gen_extv_regsi (operands[0], operands[1], operands[2],
+				 operands[3]));
+      DONE;
+    }
+
+  FAIL;
+})
+
+; Helper to expand register forms of extv with the proper modes.
+
+(define_expand "extv_regsi"
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(sign_extract:SI (match_operand:SI 1 "s_register_operand" "")
+			 (match_operand 2 "const_int_operand" "")
+			 (match_operand 3 "const_int_operand" "")))]
+  ""
+{
+})
+
+; ARMv6+ unaligned load/store instructions (used for packed structure accesses).
+
+(define_insn "unaligned_loadsi"
+  [(set (match_operand:SI 0 "s_register_operand" "=l,r")
+	(unspec:SI [(match_operand:SI 1 "memory_operand" "Uw,m")]
+		   UNSPEC_UNALIGNED_LOAD))]
+  "unaligned_access && TARGET_32BIT"
+  "ldr%?\t%0, %1\t@ unaligned"
+  [(set_attr "arch" "t2,any")
+   (set_attr "length" "2,4")
+   (set_attr "predicable" "yes")
+   (set_attr "type" "load1")])
+
+(define_insn "unaligned_loadhis"
+  [(set (match_operand:SI 0 "s_register_operand" "=l,r")
+	(sign_extend:SI
+	  (unspec:HI [(match_operand:HI 1 "memory_operand" "Uw,m")]
+		     UNSPEC_UNALIGNED_LOAD)))]
+  "unaligned_access && TARGET_32BIT"
+  "ldr%(sh%)\t%0, %1\t@ unaligned"
+  [(set_attr "arch" "t2,any")
+   (set_attr "length" "2,4")
+   (set_attr "predicable" "yes")
+   (set_attr "type" "load_byte")])
+
+(define_insn "unaligned_loadhiu"
+  [(set (match_operand:SI 0 "s_register_operand" "=l,r")
+	(zero_extend:SI
+	  (unspec:HI [(match_operand:HI 1 "memory_operand" "Uw,m")]
+		     UNSPEC_UNALIGNED_LOAD)))]
+  "unaligned_access && TARGET_32BIT"
+  "ldr%(h%)\t%0, %1\t@ unaligned"
+  [(set_attr "arch" "t2,any")
+   (set_attr "length" "2,4")
+   (set_attr "predicable" "yes")
+   (set_attr "type" "load_byte")])
+
+(define_insn "unaligned_storesi"
+  [(set (match_operand:SI 0 "memory_operand" "=Uw,m")
+	(unspec:SI [(match_operand:SI 1 "s_register_operand" "l,r")]
+		   UNSPEC_UNALIGNED_STORE))]
+  "unaligned_access && TARGET_32BIT"
+  "str%?\t%1, %0\t@ unaligned"
+  [(set_attr "arch" "t2,any")
+   (set_attr "length" "2,4")
+   (set_attr "predicable" "yes")
+   (set_attr "type" "store1")])
+
+(define_insn "unaligned_storehi"
+  [(set (match_operand:HI 0 "memory_operand" "=Uw,m")
+	(unspec:HI [(match_operand:HI 1 "s_register_operand" "l,r")]
+		   UNSPEC_UNALIGNED_STORE))]
+  "unaligned_access && TARGET_32BIT"
+  "str%(h%)\t%1, %0\t@ unaligned"
+  [(set_attr "arch" "t2,any")
+   (set_attr "length" "2,4")
+   (set_attr "predicable" "yes")
+   (set_attr "type" "store1")])
+
+(define_insn "*extv_reg"
   [(set (match_operand:SI 0 "s_register_operand" "=r")
 	(sign_extract:SI (match_operand:SI 1 "s_register_operand" "r")
                          (match_operand:SI 2 "const_int_operand" "M")
@@ -3673,6 +4038,28 @@
    (set_attr "predicable" "yes")]
 )
 
+
+;; Division instructions
+(define_insn "divsi3"
+  [(set (match_operand:SI	  0 "s_register_operand" "=r")
+	(div:SI (match_operand:SI 1 "s_register_operand"  "r")
+		(match_operand:SI 2 "s_register_operand"  "r")))]
+  "TARGET_IDIV"
+  "sdiv%?\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "insn" "sdiv")]
+)
+
+(define_insn "udivsi3"
+  [(set (match_operand:SI	   0 "s_register_operand" "=r")
+	(udiv:SI (match_operand:SI 1 "s_register_operand"  "r")
+		 (match_operand:SI 2 "s_register_operand"  "r")))]
+  "TARGET_IDIV"
+  "udiv%?\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "insn" "udiv")]
+)
+
 
 ;; Unary arithmetic insns
 
@@ -4045,8 +4432,8 @@
 
 (define_insn "zero_extend<mode>di2"
   [(set (match_operand:DI 0 "s_register_operand" "=r")
-        (zero_extend:DI (match_operand:QHSI 1 "<qhs_extenddi_op>"
-					    "<qhs_extenddi_cstr>")))]
+        (zero_extend:DI (match_operand:QHSI 1 "<qhs_zextenddi_op>"
+					    "<qhs_zextenddi_cstr>")))]
   "TARGET_32BIT <qhs_zextenddi_cond>"
   "#"
   [(set_attr "length" "8")
@@ -5963,8 +6350,8 @@
 
 
 (define_insn "*arm_movqi_insn"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,m")
-	(match_operand:QI 1 "general_operand" "rI,K,m,r"))]
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,l,Uu,r,m")
+	(match_operand:QI 1 "general_operand" "rI,K,Uu,l,m,r"))]
   "TARGET_32BIT
    && (   register_operand (operands[0], QImode)
        || register_operand (operands[1], QImode))"
@@ -5972,10 +6359,14 @@
    mov%?\\t%0, %1
    mvn%?\\t%0, #%B1
    ldr%(b%)\\t%0, %1
+   str%(b%)\\t%1, %0
+   ldr%(b%)\\t%0, %1
    str%(b%)\\t%1, %0"
-  [(set_attr "type" "*,*,load1,store1")
-   (set_attr "insn" "mov,mvn,*,*")
-   (set_attr "predicable" "yes")]
+  [(set_attr "type" "*,*,load1,store1,load1,store1")
+   (set_attr "insn" "mov,mvn,*,*,*,*")
+   (set_attr "predicable" "yes")
+   (set_attr "arch" "any,any,t2,t2,any,any")
+   (set_attr "length" "4,4,2,2,4,4")]
 )
 
 (define_insn "*thumb1_movqi_insn"
@@ -6205,7 +6596,7 @@
   [(match_operand:DF 0 "arm_reload_memory_operand" "=o")
    (match_operand:DF 1 "s_register_operand" "r")
    (match_operand:SI 2 "s_register_operand" "=&r")]
-  "TARGET_32BIT"
+  "TARGET_THUMB2"
   "
   {
     enum rtx_code code = GET_CODE (XEXP (operands[0], 0));
@@ -6468,7 +6859,7 @@
 
 (define_expand "cbranchsi4"
   [(set (pc) (if_then_else
-	      (match_operator 0 "arm_comparison_operator"
+	      (match_operator 0 "expandable_comparison_operator"
 	       [(match_operand:SI 1 "s_register_operand" "")
 	        (match_operand:SI 2 "nonmemory_operand" "")])
 	      (label_ref (match_operand 3 "" ""))
@@ -6519,7 +6910,7 @@
 
 (define_expand "cbranchsf4"
   [(set (pc) (if_then_else
-	      (match_operator 0 "arm_comparison_operator"
+	      (match_operator 0 "expandable_comparison_operator"
 	       [(match_operand:SF 1 "s_register_operand" "")
 	        (match_operand:SF 2 "arm_float_compare_operand" "")])
 	      (label_ref (match_operand 3 "" ""))
@@ -6531,7 +6922,7 @@
 
 (define_expand "cbranchdf4"
   [(set (pc) (if_then_else
-	      (match_operator 0 "arm_comparison_operator"
+	      (match_operator 0 "expandable_comparison_operator"
 	       [(match_operand:DF 1 "s_register_operand" "")
 	        (match_operand:DF 2 "arm_float_compare_operand" "")])
 	      (label_ref (match_operand 3 "" ""))
@@ -6543,7 +6934,7 @@
 
 (define_expand "cbranchdi4"
   [(set (pc) (if_then_else
-	      (match_operator 0 "arm_comparison_operator"
+	      (match_operator 0 "expandable_comparison_operator"
 	       [(match_operand:DI 1 "cmpdi_operand" "")
 	        (match_operand:DI 2 "cmpdi_operand" "")])
 	      (label_ref (match_operand 3 "" ""))
@@ -7132,13 +7523,17 @@
 
 (define_insn "*arm_cmpsi_insn"
   [(set (reg:CC CC_REGNUM)
-	(compare:CC (match_operand:SI 0 "s_register_operand" "r,r")
-		    (match_operand:SI 1 "arm_add_operand"    "rI,L")))]
+	(compare:CC (match_operand:SI 0 "s_register_operand" "l,r,r,r")
+		    (match_operand:SI 1 "arm_add_operand"    "Py,r,rI,L")))]
   "TARGET_32BIT"
   "@
    cmp%?\\t%0, %1
+   cmp%?\\t%0, %1
+   cmp%?\\t%0, %1
    cmn%?\\t%0, #%n1"
-  [(set_attr "conds" "set")]
+  [(set_attr "conds" "set")
+   (set_attr "arch" "t2,t2,any,any")
+   (set_attr "length" "2,2,4,4")]
 )
 
 (define_insn "*cmpsi_shiftsi"
@@ -7201,8 +7596,8 @@
   [(set (reg:CC_CZ CC_REGNUM)
 	(compare:CC_CZ (match_operand:DI 0 "s_register_operand" "r")
 		       (match_operand:DI 1 "arm_di_operand"	"rDi")))]
-  "TARGET_ARM"
-  "cmp%?\\t%R0, %R1\;cmpeq\\t%Q0, %Q1"
+  "TARGET_32BIT"
+  "cmp\\t%R0, %R1\;it eq\;cmpeq\\t%Q0, %Q1"
   [(set_attr "conds" "set")
    (set_attr "length" "8")]
 )
@@ -7309,7 +7704,14 @@
   return \"b%d1\\t%l0\";
   "
   [(set_attr "conds" "use")
-   (set_attr "type" "branch")]
+   (set_attr "type" "branch")
+   (set (attr "length")
+	(if_then_else
+	   (and (ne (symbol_ref "TARGET_THUMB2") (const_int 0))
+		(and (ge (minus (match_dup 0) (pc)) (const_int -250))
+		     (le (minus (match_dup 0) (pc)) (const_int 256))))
+	   (const_int 2)
+	   (const_int 4)))]
 )
 
 (define_insn "*arm_cond_branch_reversed"
@@ -7328,7 +7730,14 @@
   return \"b%D1\\t%l0\";
   "
   [(set_attr "conds" "use")
-   (set_attr "type" "branch")]
+   (set_attr "type" "branch")
+   (set (attr "length")
+	(if_then_else
+	   (and (ne (symbol_ref "TARGET_THUMB2") (const_int 0))
+		(and (ge (minus (match_dup 0) (pc)) (const_int -250))
+		     (le (minus (match_dup 0) (pc)) (const_int 256))))
+	   (const_int 2)
+	   (const_int 4)))]
 )
 
 
@@ -7380,7 +7789,7 @@
 
 (define_expand "cstoresi4"
   [(set (match_operand:SI 0 "s_register_operand" "")
-	(match_operator:SI 1 "arm_comparison_operator"
+	(match_operator:SI 1 "expandable_comparison_operator"
 	 [(match_operand:SI 2 "s_register_operand" "")
 	  (match_operand:SI 3 "reg_or_int_operand" "")]))]
   "TARGET_32BIT || TARGET_THUMB1"
@@ -7516,7 +7925,7 @@
 
 (define_expand "cstoresf4"
   [(set (match_operand:SI 0 "s_register_operand" "")
-	(match_operator:SI 1 "arm_comparison_operator"
+	(match_operator:SI 1 "expandable_comparison_operator"
 	 [(match_operand:SF 2 "s_register_operand" "")
 	  (match_operand:SF 3 "arm_float_compare_operand" "")]))]
   "TARGET_32BIT && TARGET_HARD_FLOAT"
@@ -7526,7 +7935,7 @@
 
 (define_expand "cstoredf4"
   [(set (match_operand:SI 0 "s_register_operand" "")
-	(match_operator:SI 1 "arm_comparison_operator"
+	(match_operator:SI 1 "expandable_comparison_operator"
 	 [(match_operand:DF 2 "s_register_operand" "")
 	  (match_operand:DF 3 "arm_float_compare_operand" "")]))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE"
@@ -7536,7 +7945,7 @@
 
 (define_expand "cstoredi4"
   [(set (match_operand:SI 0 "s_register_operand" "")
-	(match_operator:SI 1 "arm_comparison_operator"
+	(match_operator:SI 1 "expandable_comparison_operator"
 	 [(match_operand:DI 2 "cmpdi_operand" "")
 	  (match_operand:DI 3 "cmpdi_operand" "")]))]
   "TARGET_32BIT"
@@ -7656,7 +8065,7 @@
 
 (define_expand "movsicc"
   [(set (match_operand:SI 0 "s_register_operand" "")
-	(if_then_else:SI (match_operand 1 "arm_comparison_operator" "")
+	(if_then_else:SI (match_operand 1 "expandable_comparison_operator" "")
 			 (match_operand:SI 2 "arm_not_operand" "")
 			 (match_operand:SI 3 "arm_not_operand" "")))]
   "TARGET_32BIT"
@@ -7676,7 +8085,7 @@
 
 (define_expand "movsfcc"
   [(set (match_operand:SF 0 "s_register_operand" "")
-	(if_then_else:SF (match_operand 1 "arm_comparison_operator" "")
+	(if_then_else:SF (match_operand 1 "expandable_comparison_operator" "")
 			 (match_operand:SF 2 "s_register_operand" "")
 			 (match_operand:SF 3 "nonmemory_operand" "")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT"
@@ -7702,7 +8111,7 @@
 
 (define_expand "movdfcc"
   [(set (match_operand:DF 0 "s_register_operand" "")
-	(if_then_else:DF (match_operand 1 "arm_comparison_operator" "")
+	(if_then_else:DF (match_operand 1 "expandable_comparison_operator" "")
 			 (match_operand:DF 2 "s_register_operand" "")
 			 (match_operand:DF 3 "arm_float_add_operand" "")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP_DOUBLE)"
@@ -7780,7 +8189,14 @@
     return \"b%?\\t%l0\";
   }
   "
-  [(set_attr "predicable" "yes")]
+  [(set_attr "predicable" "yes")
+   (set (attr "length")
+	(if_then_else
+	   (and (ne (symbol_ref "TARGET_THUMB2") (const_int 0))
+		(and (ge (minus (match_dup 0) (pc)) (const_int -2044))
+		     (le (minus (match_dup 0) (pc)) (const_int 2048))))
+	   (const_int 2)
+	   (const_int 4)))]
 )
 
 (define_insn "*thumb_jump"
@@ -8865,40 +9281,85 @@
    (set_attr "length" "8,12")]
 )
 
-;; ??? Is it worth using these conditional patterns in Thumb-2 mode?
 (define_insn "*cmp_ite0"
   [(set (match_operand 6 "dominant_cc_register" "")
 	(compare
 	 (if_then_else:SI
 	  (match_operator 4 "arm_comparison_operator"
-	   [(match_operand:SI 0 "s_register_operand" "r,r,r,r")
-	    (match_operand:SI 1 "arm_add_operand" "rI,L,rI,L")])
+	   [(match_operand:SI 0 "s_register_operand"
+	        "l,l,l,r,r,r,r,r,r")
+	    (match_operand:SI 1 "arm_add_operand"
+	        "lPy,lPy,lPy,rI,L,rI,L,rI,L")])
 	  (match_operator:SI 5 "arm_comparison_operator"
-	   [(match_operand:SI 2 "s_register_operand" "r,r,r,r")
-	    (match_operand:SI 3 "arm_add_operand" "rI,rI,L,L")])
+	   [(match_operand:SI 2 "s_register_operand"
+	        "l,r,r,l,l,r,r,r,r")
+	    (match_operand:SI 3 "arm_add_operand"
+	        "lPy,rI,L,lPy,lPy,rI,rI,L,L")])
 	  (const_int 0))
 	 (const_int 0)))]
-  "TARGET_ARM"
+  "TARGET_32BIT"
   "*
   {
-    static const char * const opcodes[4][2] =
+    static const char * const cmp1[NUM_OF_COND_CMP][2] =
+    {
+      {\"cmp%d5\\t%0, %1\",
+       \"cmp%d4\\t%2, %3\"},
+      {\"cmn%d5\\t%0, #%n1\",
+       \"cmp%d4\\t%2, %3\"},
+      {\"cmp%d5\\t%0, %1\",
+       \"cmn%d4\\t%2, #%n3\"},
+      {\"cmn%d5\\t%0, #%n1\",
+       \"cmn%d4\\t%2, #%n3\"}
+    };
+    static const char * const cmp2[NUM_OF_COND_CMP][2] =
+    {
+      {\"cmp\\t%2, %3\",
+       \"cmp\\t%0, %1\"},
+      {\"cmp\\t%2, %3\",
+       \"cmn\\t%0, #%n1\"},
+      {\"cmn\\t%2, #%n3\",
+       \"cmp\\t%0, %1\"},
+      {\"cmn\\t%2, #%n3\",
+       \"cmn\\t%0, #%n1\"}
+    };
+    static const char * const ite[2] =
     {
-      {\"cmp\\t%2, %3\;cmp%d5\\t%0, %1\",
-       \"cmp\\t%0, %1\;cmp%d4\\t%2, %3\"},
-      {\"cmp\\t%2, %3\;cmn%d5\\t%0, #%n1\",
-       \"cmn\\t%0, #%n1\;cmp%d4\\t%2, %3\"},
-      {\"cmn\\t%2, #%n3\;cmp%d5\\t%0, %1\",
-       \"cmp\\t%0, %1\;cmn%d4\\t%2, #%n3\"},
-      {\"cmn\\t%2, #%n3\;cmn%d5\\t%0, #%n1\",
-       \"cmn\\t%0, #%n1\;cmn%d4\\t%2, #%n3\"}
+      \"it\\t%d5\",
+      \"it\\t%d4\"
     };
+    static const int cmp_idx[9] = {CMP_CMP, CMP_CMP, CMP_CMN,
+                                   CMP_CMP, CMN_CMP, CMP_CMP,
+                                   CMN_CMP, CMP_CMN, CMN_CMN};
     int swap =
       comparison_dominates_p (GET_CODE (operands[5]), GET_CODE (operands[4]));
 
-    return opcodes[which_alternative][swap];
+    output_asm_insn (cmp2[cmp_idx[which_alternative]][swap], operands);
+    if (TARGET_THUMB2) {
+      output_asm_insn (ite[swap], operands);
+    }
+    output_asm_insn (cmp1[cmp_idx[which_alternative]][swap], operands);
+    return \"\";
   }"
   [(set_attr "conds" "set")
-   (set_attr "length" "8")]
+   (set_attr "arch" "t2,t2,t2,t2,t2,any,any,any,any")
+   (set_attr_alternative "length"
+      [(const_int 6)
+       (const_int 8)
+       (const_int 8)
+       (const_int 8)
+       (const_int 8)
+       (if_then_else (eq_attr "is_thumb" "no")
+           (const_int 8)
+           (const_int 10))
+       (if_then_else (eq_attr "is_thumb" "no")
+           (const_int 8)
+           (const_int 10))
+       (if_then_else (eq_attr "is_thumb" "no")
+           (const_int 8)
+           (const_int 10))
+       (if_then_else (eq_attr "is_thumb" "no")
+           (const_int 8)
+           (const_int 10))])]
 )
 
 (define_insn "*cmp_ite1"
@@ -8906,35 +9367,81 @@
 	(compare
 	 (if_then_else:SI
 	  (match_operator 4 "arm_comparison_operator"
-	   [(match_operand:SI 0 "s_register_operand" "r,r,r,r")
-	    (match_operand:SI 1 "arm_add_operand" "rI,L,rI,L")])
+	   [(match_operand:SI 0 "s_register_operand"
+	        "l,l,l,r,r,r,r,r,r")
+	    (match_operand:SI 1 "arm_add_operand"
+	        "lPy,lPy,lPy,rI,L,rI,L,rI,L")])
 	  (match_operator:SI 5 "arm_comparison_operator"
-	   [(match_operand:SI 2 "s_register_operand" "r,r,r,r")
-	    (match_operand:SI 3 "arm_add_operand" "rI,rI,L,L")])
+	   [(match_operand:SI 2 "s_register_operand"
+	        "l,r,r,l,l,r,r,r,r")
+	    (match_operand:SI 3 "arm_add_operand"
+	        "lPy,rI,L,lPy,lPy,rI,rI,L,L")])
 	  (const_int 1))
 	 (const_int 0)))]
-  "TARGET_ARM"
+  "TARGET_32BIT"
   "*
   {
-    static const char * const opcodes[4][2] =
+    static const char * const cmp1[NUM_OF_COND_CMP][2] =
+    {
+      {\"cmp\\t%0, %1\",
+       \"cmp\\t%2, %3\"},
+      {\"cmn\\t%0, #%n1\",
+       \"cmp\\t%2, %3\"},
+      {\"cmp\\t%0, %1\",
+       \"cmn\\t%2, #%n3\"},
+      {\"cmn\\t%0, #%n1\",
+       \"cmn\\t%2, #%n3\"}
+    };
+    static const char * const cmp2[NUM_OF_COND_CMP][2] =
     {
-      {\"cmp\\t%0, %1\;cmp%d4\\t%2, %3\",
-       \"cmp\\t%2, %3\;cmp%D5\\t%0, %1\"},
-      {\"cmn\\t%0, #%n1\;cmp%d4\\t%2, %3\",
-       \"cmp\\t%2, %3\;cmn%D5\\t%0, #%n1\"},
-      {\"cmp\\t%0, %1\;cmn%d4\\t%2, #%n3\",
-       \"cmn\\t%2, #%n3\;cmp%D5\\t%0, %1\"},
-      {\"cmn\\t%0, #%n1\;cmn%d4\\t%2, #%n3\",
-       \"cmn\\t%2, #%n3\;cmn%D5\\t%0, #%n1\"}
+      {\"cmp%d4\\t%2, %3\",
+       \"cmp%D5\\t%0, %1\"},
+      {\"cmp%d4\\t%2, %3\",
+       \"cmn%D5\\t%0, #%n1\"},
+      {\"cmn%d4\\t%2, #%n3\",
+       \"cmp%D5\\t%0, %1\"},
+      {\"cmn%d4\\t%2, #%n3\",
+       \"cmn%D5\\t%0, #%n1\"}
     };
+    static const char * const ite[2] =
+    {
+      \"it\\t%d4\",
+      \"it\\t%D5\"
+    };
+    static const int cmp_idx[9] = {CMP_CMP, CMP_CMP, CMP_CMN,
+                                   CMP_CMP, CMN_CMP, CMP_CMP,
+                                   CMN_CMP, CMP_CMN, CMN_CMN};
     int swap =
       comparison_dominates_p (GET_CODE (operands[5]),
 			      reverse_condition (GET_CODE (operands[4])));
 
-    return opcodes[which_alternative][swap];
+    output_asm_insn (cmp1[cmp_idx[which_alternative]][swap], operands);
+    if (TARGET_THUMB2) {
+      output_asm_insn (ite[swap], operands);
+    }
+    output_asm_insn (cmp2[cmp_idx[which_alternative]][swap], operands);
+    return \"\";
   }"
   [(set_attr "conds" "set")
-   (set_attr "length" "8")]
+   (set_attr "arch" "t2,t2,t2,t2,t2,any,any,any,any")
+   (set_attr_alternative "length"
+      [(const_int 6)
+       (const_int 8)
+       (const_int 8)
+       (const_int 8)
+       (const_int 8)
+       (if_then_else (eq_attr "is_thumb" "no")
+           (const_int 8)
+           (const_int 10))
+       (if_then_else (eq_attr "is_thumb" "no")
+           (const_int 8)
+           (const_int 10))
+       (if_then_else (eq_attr "is_thumb" "no")
+           (const_int 8)
+           (const_int 10))
+       (if_then_else (eq_attr "is_thumb" "no")
+           (const_int 8)
+           (const_int 10))])]
 )
 
 (define_insn "*cmp_and"
@@ -8942,34 +9449,80 @@
 	(compare
 	 (and:SI
 	  (match_operator 4 "arm_comparison_operator"
-	   [(match_operand:SI 0 "s_register_operand" "r,r,r,r")
-	    (match_operand:SI 1 "arm_add_operand" "rI,L,rI,L")])
+	   [(match_operand:SI 0 "s_register_operand" 
+	        "l,l,l,r,r,r,r,r,r")
+	    (match_operand:SI 1 "arm_add_operand" 
+	        "lPy,lPy,lPy,rI,L,rI,L,rI,L")])
 	  (match_operator:SI 5 "arm_comparison_operator"
-	   [(match_operand:SI 2 "s_register_operand" "r,r,r,r")
-	    (match_operand:SI 3 "arm_add_operand" "rI,rI,L,L")]))
+	   [(match_operand:SI 2 "s_register_operand" 
+	        "l,r,r,l,l,r,r,r,r")
+	    (match_operand:SI 3 "arm_add_operand" 
+	        "lPy,rI,L,lPy,lPy,rI,rI,L,L")]))
 	 (const_int 0)))]
-  "TARGET_ARM"
+  "TARGET_32BIT"
   "*
   {
-    static const char *const opcodes[4][2] =
+    static const char *const cmp1[NUM_OF_COND_CMP][2] =
     {
-      {\"cmp\\t%2, %3\;cmp%d5\\t%0, %1\",
-       \"cmp\\t%0, %1\;cmp%d4\\t%2, %3\"},
-      {\"cmp\\t%2, %3\;cmn%d5\\t%0, #%n1\",
-       \"cmn\\t%0, #%n1\;cmp%d4\\t%2, %3\"},
-      {\"cmn\\t%2, #%n3\;cmp%d5\\t%0, %1\",
-       \"cmp\\t%0, %1\;cmn%d4\\t%2, #%n3\"},
-      {\"cmn\\t%2, #%n3\;cmn%d5\\t%0, #%n1\",
-       \"cmn\\t%0, #%n1\;cmn%d4\\t%2, #%n3\"}
+      {\"cmp%d5\\t%0, %1\",
+       \"cmp%d4\\t%2, %3\"},
+      {\"cmn%d5\\t%0, #%n1\",
+       \"cmp%d4\\t%2, %3\"},
+      {\"cmp%d5\\t%0, %1\",
+       \"cmn%d4\\t%2, #%n3\"},
+      {\"cmn%d5\\t%0, #%n1\",
+       \"cmn%d4\\t%2, #%n3\"}
     };
+    static const char *const cmp2[NUM_OF_COND_CMP][2] =
+    {
+      {\"cmp\\t%2, %3\",
+       \"cmp\\t%0, %1\"},
+      {\"cmp\\t%2, %3\",
+       \"cmn\\t%0, #%n1\"},
+      {\"cmn\\t%2, #%n3\",
+       \"cmp\\t%0, %1\"},
+      {\"cmn\\t%2, #%n3\",
+       \"cmn\\t%0, #%n1\"}
+    };
+    static const char *const ite[2] =
+    {
+      \"it\\t%d5\",
+      \"it\\t%d4\"
+    };
+    static const int cmp_idx[9] = {CMP_CMP, CMP_CMP, CMP_CMN,
+                                   CMP_CMP, CMN_CMP, CMP_CMP,
+                                   CMN_CMP, CMP_CMN, CMN_CMN};
     int swap =
       comparison_dominates_p (GET_CODE (operands[5]), GET_CODE (operands[4]));
 
-    return opcodes[which_alternative][swap];
+    output_asm_insn (cmp2[cmp_idx[which_alternative]][swap], operands);
+    if (TARGET_THUMB2) {
+      output_asm_insn (ite[swap], operands);
+    }
+    output_asm_insn (cmp1[cmp_idx[which_alternative]][swap], operands);
+    return \"\";
   }"
   [(set_attr "conds" "set")
    (set_attr "predicable" "no")
-   (set_attr "length" "8")]
+   (set_attr "arch" "t2,t2,t2,t2,t2,any,any,any,any")
+   (set_attr_alternative "length"
+      [(const_int 6)
+       (const_int 8)
+       (const_int 8)
+       (const_int 8)
+       (const_int 8)
+       (if_then_else (eq_attr "is_thumb" "no")
+           (const_int 8)
+           (const_int 10))
+       (if_then_else (eq_attr "is_thumb" "no")
+           (const_int 8)
+           (const_int 10))
+       (if_then_else (eq_attr "is_thumb" "no")
+           (const_int 8)
+           (const_int 10))
+       (if_then_else (eq_attr "is_thumb" "no")
+           (const_int 8)
+           (const_int 10))])]
 )
 
 (define_insn "*cmp_ior"
@@ -8977,34 +9530,80 @@
 	(compare
 	 (ior:SI
 	  (match_operator 4 "arm_comparison_operator"
-	   [(match_operand:SI 0 "s_register_operand" "r,r,r,r")
-	    (match_operand:SI 1 "arm_add_operand" "rI,L,rI,L")])
+	   [(match_operand:SI 0 "s_register_operand"
+	        "l,l,l,r,r,r,r,r,r")
+	    (match_operand:SI 1 "arm_add_operand"
+	        "lPy,lPy,lPy,rI,L,rI,L,rI,L")])
 	  (match_operator:SI 5 "arm_comparison_operator"
-	   [(match_operand:SI 2 "s_register_operand" "r,r,r,r")
-	    (match_operand:SI 3 "arm_add_operand" "rI,rI,L,L")]))
+	   [(match_operand:SI 2 "s_register_operand"
+	        "l,r,r,l,l,r,r,r,r")
+	    (match_operand:SI 3 "arm_add_operand"
+	        "lPy,rI,L,lPy,lPy,rI,rI,L,L")]))
 	 (const_int 0)))]
-  "TARGET_ARM"
+  "TARGET_32BIT"
   "*
-{
-  static const char *const opcodes[4][2] =
   {
-    {\"cmp\\t%0, %1\;cmp%D4\\t%2, %3\",
-     \"cmp\\t%2, %3\;cmp%D5\\t%0, %1\"},
-    {\"cmn\\t%0, #%n1\;cmp%D4\\t%2, %3\",
-     \"cmp\\t%2, %3\;cmn%D5\\t%0, #%n1\"},
-    {\"cmp\\t%0, %1\;cmn%D4\\t%2, #%n3\",
-     \"cmn\\t%2, #%n3\;cmp%D5\\t%0, %1\"},
-    {\"cmn\\t%0, #%n1\;cmn%D4\\t%2, #%n3\",
-     \"cmn\\t%2, #%n3\;cmn%D5\\t%0, #%n1\"}
-  };
-  int swap =
-    comparison_dominates_p (GET_CODE (operands[5]), GET_CODE (operands[4]));
+    static const char *const cmp1[NUM_OF_COND_CMP][2] =
+    {
+      {\"cmp\\t%0, %1\",
+       \"cmp\\t%2, %3\"},
+      {\"cmn\\t%0, #%n1\",
+       \"cmp\\t%2, %3\"},
+      {\"cmp\\t%0, %1\",
+       \"cmn\\t%2, #%n3\"},
+      {\"cmn\\t%0, #%n1\",
+       \"cmn\\t%2, #%n3\"}
+    };
+    static const char *const cmp2[NUM_OF_COND_CMP][2] =
+    {
+      {\"cmp%D4\\t%2, %3\",
+       \"cmp%D5\\t%0, %1\"},
+      {\"cmp%D4\\t%2, %3\",
+       \"cmn%D5\\t%0, #%n1\"},
+      {\"cmn%D4\\t%2, #%n3\",
+       \"cmp%D5\\t%0, %1\"},
+      {\"cmn%D4\\t%2, #%n3\",
+       \"cmn%D5\\t%0, #%n1\"}
+    };
+    static const char *const ite[2] =
+    {
+      \"it\\t%D4\",
+      \"it\\t%D5\"
+    };
+    static const int cmp_idx[9] = {CMP_CMP, CMP_CMP, CMP_CMN,
+                                   CMP_CMP, CMN_CMP, CMP_CMP,
+                                   CMN_CMP, CMP_CMN, CMN_CMN};
+    int swap =
+      comparison_dominates_p (GET_CODE (operands[5]), GET_CODE (operands[4]));
 
-  return opcodes[which_alternative][swap];
-}
-"
+    output_asm_insn (cmp1[cmp_idx[which_alternative]][swap], operands);
+    if (TARGET_THUMB2) {
+      output_asm_insn (ite[swap], operands);
+    }
+    output_asm_insn (cmp2[cmp_idx[which_alternative]][swap], operands);
+    return \"\";
+  }
+  "
   [(set_attr "conds" "set")
-   (set_attr "length" "8")]
+   (set_attr "arch" "t2,t2,t2,t2,t2,any,any,any,any")
+   (set_attr_alternative "length"
+      [(const_int 6)
+       (const_int 8)
+       (const_int 8)
+       (const_int 8)
+       (const_int 8)
+       (if_then_else (eq_attr "is_thumb" "no")
+           (const_int 8)
+           (const_int 10))
+       (if_then_else (eq_attr "is_thumb" "no")
+           (const_int 8)
+           (const_int 10))
+       (if_then_else (eq_attr "is_thumb" "no")
+           (const_int 8)
+           (const_int 10))
+       (if_then_else (eq_attr "is_thumb" "no")
+           (const_int 8)
+           (const_int 10))])]
 )
 
 (define_insn_and_split "*ior_scc_scc"
@@ -9016,11 +9615,11 @@
 		 [(match_operand:SI 4 "s_register_operand" "r")
 		  (match_operand:SI 5 "arm_add_operand" "rIL")])))
    (clobber (reg:CC CC_REGNUM))]
-  "TARGET_ARM
+  "TARGET_32BIT
    && (arm_select_dominance_cc_mode (operands[3], operands[6], DOM_CC_X_OR_Y)
        != CCmode)"
   "#"
-  "TARGET_ARM && reload_completed"
+  "TARGET_32BIT && reload_completed"
   [(set (match_dup 7)
 	(compare
 	 (ior:SI
@@ -9049,9 +9648,9 @@
    (set (match_operand:SI 7 "s_register_operand" "=r")
 	(ior:SI (match_op_dup 3 [(match_dup 1) (match_dup 2)])
 		(match_op_dup 6 [(match_dup 4) (match_dup 5)])))]
-  "TARGET_ARM"
+  "TARGET_32BIT"
   "#"
-  "TARGET_ARM && reload_completed"
+  "TARGET_32BIT && reload_completed"
   [(set (match_dup 0)
 	(compare
 	 (ior:SI
@@ -9072,11 +9671,11 @@
 		 [(match_operand:SI 4 "s_register_operand" "r")
 		  (match_operand:SI 5 "arm_add_operand" "rIL")])))
    (clobber (reg:CC CC_REGNUM))]
-  "TARGET_ARM
+  "TARGET_32BIT
    && (arm_select_dominance_cc_mode (operands[3], operands[6], DOM_CC_X_AND_Y)
        != CCmode)"
   "#"
-  "TARGET_ARM && reload_completed
+  "TARGET_32BIT && reload_completed
    && (arm_select_dominance_cc_mode (operands[3], operands[6], DOM_CC_X_AND_Y)
        != CCmode)"
   [(set (match_dup 7)
@@ -9107,9 +9706,9 @@
    (set (match_operand:SI 7 "s_register_operand" "=r")
 	(and:SI (match_op_dup 3 [(match_dup 1) (match_dup 2)])
 		(match_op_dup 6 [(match_dup 4) (match_dup 5)])))]
-  "TARGET_ARM"
+  "TARGET_32BIT"
   "#"
-  "TARGET_ARM && reload_completed"
+  "TARGET_32BIT && reload_completed"
   [(set (match_dup 0)
 	(compare
 	 (and:SI
@@ -9134,11 +9733,11 @@
 		 [(match_operand:SI 4 "s_register_operand" "r,r,r")
 		  (match_operand:SI 5 "arm_add_operand" "rIL,rIL,rIL")])))
    (clobber (reg:CC CC_REGNUM))]
-  "TARGET_ARM
+  "TARGET_32BIT
    && (arm_select_dominance_cc_mode (operands[3], operands[6], DOM_CC_X_AND_Y)
        == CCmode)"
   "#"
-  "TARGET_ARM && reload_completed"
+  "TARGET_32BIT && reload_completed"
   [(parallel [(set (match_dup 0)
 		   (match_op_dup 3 [(match_dup 1) (match_dup 2)]))
 	      (clobber (reg:CC CC_REGNUM))])
@@ -10248,6 +10847,8 @@
 ;; Push multiple registers to the stack.  Registers are in parallel (use ...)
 ;; expressions.  For simplicity, the first register is also in the unspec
 ;; part.
+;; To avoid the usage of GNU extension, the length attribute is computed
+;; in a C function arm_attr_length_push_multi.
 (define_insn "*push_multi"
   [(match_parallel 2 "multi_register_push"
     [(set (match_operand:BLK 0 "memory_operand" "=m")
@@ -10287,7 +10888,9 @@
 
     return \"\";
   }"
-  [(set_attr "type" "store4")]
+  [(set_attr "type" "store4")
+   (set (attr "length")
+	(symbol_ref "arm_attr_length_push_multi (operands[2], operands[1])"))]
 )
 
 (define_insn "stack_tie"
--- a/src/gcc/config/arm/arm.opt
+++ b/src/gcc/config/arm/arm.opt
@@ -48,6 +48,11 @@
 Target RejectNegative Joined
 Specify the name of the target architecture
 
+; Other arm_arch values are loaded from arm-tables.opt
+; but that is a generated file and this is an odd-one-out.
+EnumValue
+Enum(arm_arch) String(native) Value(-1) DriverOnly
+
 marm
 Target RejectNegative InverseMask(THUMB) Undocumented
 
@@ -153,14 +158,23 @@
 Target RejectNegative Joined
 Tune code for the given processor
 
+; Other processor_type values are loaded from arm-tables.opt
+; but that is a generated file and this is an odd-one-out.
+EnumValue
+Enum(processor_type) String(native) Value(-1) DriverOnly
+
 mwords-little-endian
 Target Report RejectNegative Mask(LITTLE_WORDS)
 Assume big endian bytes, little endian words
 
 mvectorize-with-neon-quad
-Target Report Mask(NEON_VECTORIZE_QUAD)
+Target Report RejectNegative InverseMask(NEON_VECTORIZE_DOUBLE)
 Use Neon quad-word (rather than double-word) registers for vectorization
 
+mvectorize-with-neon-double
+Target Report RejectNegative Mask(NEON_VECTORIZE_DOUBLE)
+Use Neon double-word (rather than quad-word) registers for vectorization
+
 mword-relocations
 Target Report Var(target_word_relocations) Init(TARGET_DEFAULT_WORD_RELOCATIONS)
 Only generate absolute relocations on word sized values.
@@ -169,3 +183,7 @@
 Target Report Var(fix_cm3_ldrd) Init(2)
 Avoid overlapping destination and address registers on LDRD instructions
 that may trigger Cortex-M3 errata.
+
+munaligned-access
+Target Report Var(unaligned_access) Init(2)
+Enable unaligned word and halfword accesses to packed data.
--- a/src/gcc/config/arm/arm-protos.h
+++ b/src/gcc/config/arm/arm-protos.h
@@ -46,6 +46,7 @@
 extern bool arm_small_register_classes_for_mode_p (enum machine_mode);
 extern int arm_hard_regno_mode_ok (unsigned int, enum machine_mode);
 extern int const_ok_for_arm (HOST_WIDE_INT);
+extern int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
 extern int arm_split_constant (RTX_CODE, enum machine_mode, rtx,
 			       HOST_WIDE_INT, rtx, rtx, int);
 extern RTX_CODE arm_canonicalize_comparison (RTX_CODE, rtx *, rtx *);
@@ -58,14 +59,19 @@
 					   int);
 extern rtx thumb_legitimize_reload_address (rtx *, enum machine_mode, int, int,
 					    int);
+extern int thumb1_legitimate_address_p (enum machine_mode, rtx, int);
 extern int arm_const_double_rtx (rtx);
 extern int neg_const_double_rtx_ok_for_fpa (rtx);
 extern int vfp3_const_double_rtx (rtx);
 extern int neon_immediate_valid_for_move (rtx, enum machine_mode, rtx *, int *);
 extern int neon_immediate_valid_for_logic (rtx, enum machine_mode, int, rtx *,
 					   int *);
+extern int neon_immediate_valid_for_shift (rtx, enum machine_mode, rtx *,
+					   int *, bool);
 extern char *neon_output_logic_immediate (const char *, rtx *,
 					  enum machine_mode, int, int);
+extern char *neon_output_shift_immediate (const char *, char, rtx *,
+					  enum machine_mode, int, bool);
 extern void neon_pairwise_reduce (rtx, rtx, enum machine_mode,
 				  rtx (*) (rtx, rtx, rtx));
 extern rtx neon_make_constant (rtx);
@@ -81,7 +87,6 @@
 extern enum reg_class coproc_secondary_reload_class (enum machine_mode, rtx,
 						     bool);
 extern bool arm_tls_referenced_p (rtx);
-extern bool arm_cannot_force_const_mem (rtx);
 
 extern int cirrus_memory_offset (rtx);
 extern int arm_coproc_mem_operand (rtx, bool);
@@ -99,6 +104,7 @@
 extern int symbol_mentioned_p (rtx);
 extern int label_mentioned_p (rtx);
 extern RTX_CODE minmax_code (rtx);
+extern bool arm_sat_operator_match (rtx, rtx, int *, bool *);
 extern int adjacent_mem_locations (rtx, rtx);
 extern bool gen_ldm_seq (rtx *, int, bool);
 extern bool gen_stm_seq (rtx *, int);
@@ -152,6 +158,7 @@
 extern const char *arm_output_memory_barrier (rtx *);
 extern const char *arm_output_sync_insn (rtx, rtx *);
 extern unsigned int arm_sync_loop_insns (rtx , rtx *);
+extern int arm_attr_length_push_multi(rtx, rtx);
 
 #if defined TREE_CODE
 extern void arm_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree);
@@ -175,6 +182,7 @@
 #endif
 extern int thumb_shiftable_const (unsigned HOST_WIDE_INT);
 #ifdef RTX_CODE
+extern enum arm_cond_code maybe_get_arm_condition_code (rtx);
 extern void thumb1_final_prescan_insn (rtx);
 extern void thumb2_final_prescan_insn (rtx);
 extern const char *thumb_load_double_from_address (rtx *);
@@ -220,12 +228,18 @@
   bool (*rtx_costs) (rtx, RTX_CODE, RTX_CODE, int *, bool);
   bool (*sched_adjust_cost) (rtx, rtx, rtx, int *);
   int constant_limit;
+  /* Maximum number of instructions to conditionalise in
+     arm_final_prescan_insn.  */
+  int max_insns_skipped;
   int num_prefetch_slots;
   int l1_cache_size;
   int l1_cache_line_size;
+  bool prefer_constant_pool;
+  int (*branch_cost) (bool, bool);
 };
 
 extern const struct tune_params *current_tune;
+extern int vfp3_const_double_for_fract_bits (rtx);
 #endif /* RTX_CODE */
 
 #endif /* ! GCC_ARM_PROTOS_H */
--- a/src/gcc/config/arm/arm-tune.md
+++ b/src/gcc/config/arm/arm-tune.md
@@ -1,5 +1,5 @@
 ;; -*- buffer-read-only: t -*-
 ;; Generated automatically by gentune.sh from arm-cores.def
 (define_attr "tune"
-	"arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,cortexa5,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexm4,cortexm3,cortexm1,cortexm0"
+	"arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,genericv7a,cortexa5,cortexa7,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexr5,cortexm4,cortexm3,cortexm1,cortexm0"
 	(const (symbol_ref "((enum attr_tune) arm_tune)")))
--- a/src/gcc/config/arm/bpabi.h
+++ b/src/gcc/config/arm/bpabi.h
@@ -56,7 +56,9 @@
   "|march=armv4|mcpu=fa526|mcpu=fa626:--fix-v4bx}"
 
 #define BE8_LINK_SPEC " %{mbig-endian:%{march=armv7-a|mcpu=cortex-a5"\
-  "|mcpu=cortex-a8|mcpu=cortex-a9|mcpu=cortex-a15:%{!r:--be8}}}"
+  "|mcpu=cortex-a7"\
+  "|mcpu=cortex-a8|mcpu=cortex-a9|mcpu=cortex-a15|mcpu=generic-armv7-a"\
+  ":%{!r:--be8}}}"
 
 /* Tell the assembler to build BPABI binaries.  */
 #undef  SUBTARGET_EXTRA_ASM_SPEC
--- a/src/gcc/config/arm/constraints.md
+++ b/src/gcc/config/arm/constraints.md
@@ -29,13 +29,14 @@
 ;; in Thumb-1 state: I, J, K, L, M, N, O
 
 ;; The following multi-letter normal constraints have been used:
-;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dz
+;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dt, Dz
 ;; in Thumb-1 state: Pa, Pb, Pc, Pd
-;; in Thumb-2 state: Ps, Pt, Pu, Pv, Pw, Px
+;; in Thumb-2 state: Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py
 
 ;; The following memory constraints have been used:
 ;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us
 ;; in ARM state: Uq
+;; in Thumb state: Uu, Uw
 
 
 (define_register_constraint "f" "TARGET_ARM ? FPA_REGS : NO_REGS"
@@ -74,6 +75,18 @@
 	   (and (match_code "const_int")
                 (match_test "(ival & 0xffff0000) == 0")))))
 
+(define_constraint "Pj"
+ "@internal A 12-bit constant suitable for an ADDW or SUBW instruction. (Thumb-2)"
+ (and (match_code "const_int")
+      (and (match_test "TARGET_THUMB2")
+	   (match_test "(ival & 0xfffff000) == 0"))))
+
+(define_constraint "PJ"
+ "@internal A constant that satisfies the Pj constrant if negated."
+ (and (match_code "const_int")
+      (and (match_test "TARGET_THUMB2")
+	   (match_test "((-ival) & 0xfffff000) == 0"))))
+
 (define_register_constraint "k" "STACK_REG"
  "@internal The stack register.")
 
@@ -189,6 +202,11 @@
   (and (match_code "const_int")
        (match_test "TARGET_THUMB2 && ival >= -7 && ival <= -1")))
 
+(define_constraint "Py"
+  "@internal In Thumb-2 state a constant in the range 0 to 255"
+  (and (match_code "const_int")
+       (match_test "TARGET_THUMB2 && ival >= 0 && ival <= 255")))
+
 (define_constraint "G"
  "In ARM/Thumb-2 state a valid FPA immediate constant."
  (and (match_code "const_double")
@@ -273,6 +291,12 @@
  (and (match_code "const_double")
       (match_test "TARGET_32BIT && TARGET_VFP_DOUBLE && vfp3_const_double_rtx (op)")))
 
+(define_constraint "Dt" 
+ "@internal
+  In ARM/ Thumb2 a const_double which can be used with a vcvt.f32.s32 with fract bits operation"
+  (and (match_code "const_double")
+       (match_test "TARGET_32BIT && TARGET_VFP && vfp3_const_double_for_fract_bits (op)")))
+
 (define_memory_constraint "Ut"
  "@internal
   In ARM/Thumb-2 state an address valid for loading/storing opaque structure
@@ -327,6 +351,27 @@
  (and (match_code "mem")
       (match_test "REG_P (XEXP (op, 0))")))
 
+(define_memory_constraint "Uu"
+ "@internal
+  In Thumb state an address that is valid in 16bit encoding."
+ (and (match_code "mem")
+      (match_test "TARGET_THUMB
+		   && thumb1_legitimate_address_p (GET_MODE (op), XEXP (op, 0),
+						   0)")))
+
+; The 16-bit post-increment LDR/STR accepted by thumb1_legitimate_address_p
+; are actually LDM/STM instructions, so cannot be used to access unaligned
+; data.
+(define_memory_constraint "Uw"
+ "@internal
+  In Thumb state an address that is valid in 16bit encoding, and that can be
+  used for unaligned accesses."
+ (and (match_code "mem")
+      (match_test "TARGET_THUMB
+		   && thumb1_legitimate_address_p (GET_MODE (op), XEXP (op, 0),
+						   0)
+		   && GET_CODE (XEXP (op, 0)) != POST_INC")))
+
 ;; We used to have constraint letters for S and R in ARM state, but
 ;; all uses of these now appear to have been removed.
 
--- a/src/gcc/config/arm/cortex-a15.md
+++ b/src/gcc/config/arm/cortex-a15.md
@@ -0,0 +1,186 @@
+;; ARM Cortex-A15 pipeline description
+;; Copyright (C) 2011 Free Software Foundation, Inc.
+;;
+;; Written by Matthew Gretton-Dann <matthew.gretton-dann@arm.com>
+
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "cortex_a15")
+
+;; The Cortex-A15 core is modelled as a triple issue pipeline that has
+;; the following dispatch units.
+;; 1. Two pipelines for simple integer operations: SX1, SX2
+;; 2. Two pipelines for Neon and FP data-processing operations: CX1, CX2
+;; 3. One pipeline for branch operations: BX
+;; 4. One pipeline for integer multiply and divide operations: MX
+;; 5. Two pipelines for load and store operations: LS1, LS2
+;;
+;; We can issue into three pipelines per-cycle.
+;;
+;; We assume that where we have unit pairs xx1 is always filled before xx2.
+
+;; The three issue units
+(define_cpu_unit "ca15_i0, ca15_i1, ca15_i2" "cortex_a15")
+
+(define_reservation "ca15_issue1" "(ca15_i0|ca15_i1|ca15_i2)")
+(define_reservation "ca15_issue2" "((ca15_i0+ca15_i1)|(ca15_i1+ca15_i2))")
+(define_reservation "ca15_issue3" "(ca15_i0+ca15_i1+ca15_i2)")
+(final_presence_set "ca15_i1" "ca15_i0")
+(final_presence_set "ca15_i2" "ca15_i1")
+
+;; The main dispatch units
+(define_cpu_unit "ca15_sx1, ca15_sx2" "cortex_a15")
+(define_cpu_unit "ca15_cx1, ca15_cx2" "cortex_a15")
+(define_cpu_unit "ca15_ls1, ca15_ls2" "cortex_a15")
+(define_cpu_unit "ca15_bx, ca15_mx" "cortex_a15")
+
+(define_reservation "ca15_ls" "(ca15_ls1|ca15_ls2)")
+
+;; The extended load-store pipeline
+(define_cpu_unit "ca15_ldr, ca15_str" "cortex_a15")
+
+;; The extended ALU pipeline
+(define_cpu_unit "ca15_sx1_alu, ca15_sx1_shf, ca15_sx1_sat" "cortex_a15")
+(define_cpu_unit "ca15_sx2_alu, ca15_sx2_shf, ca15_sx2_sat" "cortex_a15")
+
+;; Simple Execution Unit:
+;;
+;; Simple ALU without shift
+(define_insn_reservation "cortex_a15_alu" 2
+  (and (eq_attr "tune" "cortexa15")
+       (and (eq_attr "type" "alu")
+            (eq_attr "neon_type" "none")))
+  "ca15_issue1,(ca15_sx1,ca15_sx1_alu)|(ca15_sx2,ca15_sx2_alu)")
+
+;; ALU ops with immediate shift
+(define_insn_reservation "cortex_a15_alu_shift" 3
+  (and (eq_attr "tune" "cortexa15")
+       (and (eq_attr "type" "alu_shift")
+            (eq_attr "neon_type" "none")))
+  "ca15_issue1,(ca15_sx1,ca15_sx1+ca15_sx1_shf,ca15_sx1_alu)\
+	       |(ca15_sx2,ca15_sx2+ca15_sx2_shf,ca15_sx2_alu)")
+
+;; ALU ops with register controlled shift
+(define_insn_reservation "cortex_a15_alu_shift_reg" 3
+  (and (eq_attr "tune" "cortexa15")
+       (and (eq_attr "type" "alu_shift_reg")
+	    (eq_attr "neon_type" "none")))
+  "(ca15_issue2,ca15_sx1+ca15_sx2,ca15_sx1_shf,ca15_sx2_alu)\
+   |(ca15_issue1,(ca15_issue1+ca15_sx2,ca15_sx1+ca15_sx2_shf)\
+   |(ca15_issue1+ca15_sx1,ca15_sx1+ca15_sx1_shf),ca15_sx1_alu)")
+
+;; Multiply Execution Unit:
+;;
+;; 32-bit multiplies
+(define_insn_reservation "cortex_a15_mult32" 3
+  (and (eq_attr "tune" "cortexa15")
+       (and (eq_attr "type" "mult")
+	    (and (eq_attr "neon_type" "none")
+		 (eq_attr "mul64" "no"))))
+  "ca15_issue1,ca15_mx")
+
+;; 64-bit multiplies
+(define_insn_reservation "cortex_a15_mult64" 4
+  (and (eq_attr "tune" "cortexa15")
+       (and (eq_attr "type" "mult")
+	    (and (eq_attr "neon_type" "none")
+		 (eq_attr "mul64" "yes"))))
+  "ca15_issue1,ca15_mx*2")
+
+;; Integer divide
+(define_insn_reservation "cortex_a15_udiv" 9
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "insn" "udiv"))
+  "ca15_issue1,ca15_mx")
+
+(define_insn_reservation "cortex_a15_sdiv" 10
+  (and (eq_attr "tune" "cortexa15")
+       (eq_attr "insn" "sdiv"))
+  "ca15_issue1,ca15_mx")
+
+;; Block all issue pipes for a cycle
+(define_insn_reservation "cortex_a15_block" 1
+  (and (eq_attr "tune" "cortexa15")
+       (and (eq_attr "type" "block")
+	    (eq_attr "neon_type" "none")))
+  "ca15_issue3")
+
+;; Branch execution Unit
+;;
+;; Branches take one issue slot.
+;; No latency as there is no result
+(define_insn_reservation "cortex_a15_branch" 0
+  (and (eq_attr "tune" "cortexa15")
+       (and (eq_attr "type" "branch")
+	    (eq_attr "neon_type" "none")))
+  "ca15_issue1,ca15_bx")
+
+
+;; We lie with calls.  They take up all issue slots, and form a block in the
+;; pipeline.  The result however is available the next cycle.
+;;
+;; Addition of new units requires this to be updated.
+(define_insn_reservation "cortex_a15_call" 1
+  (and (eq_attr "tune" "cortexa15")
+       (and (eq_attr "type" "call")
+	    (eq_attr "neon_type" "none")))
+  "ca15_issue3,\
+   ca15_sx1+ca15_sx2+ca15_bx+ca15_mx+ca15_cx1+ca15_cx2+ca15_ls1+ca15_ls2,\
+   ca15_sx1_alu+ca15_sx1_shf+ca15_sx1_sat+ca15_sx2_alu+ca15_sx2_shf\
+    +ca15_sx2_sat+ca15_ldr+ca15_str")
+
+;; Load-store execution Unit
+;;
+;; Loads of up to two words.
+(define_insn_reservation "cortex_a15_load1" 4
+  (and (eq_attr "tune" "cortexa15")
+       (and (eq_attr "type" "load_byte,load1,load2")
+	    (eq_attr "neon_type" "none")))
+  "ca15_issue1,ca15_ls,ca15_ldr,nothing")
+
+;; Loads of three or four words.
+(define_insn_reservation "cortex_a15_load3" 5
+  (and (eq_attr "tune" "cortexa15")
+       (and (eq_attr "type" "load3,load4")
+	    (eq_attr "neon_type" "none")))
+  "ca15_issue2,ca15_ls1+ca15_ls2,ca15_ldr,ca15_ldr,nothing")
+
+;; Stores of up to two words.
+(define_insn_reservation "cortex_a15_store1" 0
+  (and (eq_attr "tune" "cortexa15")
+       (and (eq_attr "type" "store1,store2")
+	    (eq_attr "neon_type" "none")))
+  "ca15_issue1,ca15_ls,ca15_str")
+
+;; Stores of three or four words.
+(define_insn_reservation "cortex_a15_store3" 0
+  (and (eq_attr "tune" "cortexa15")
+       (and (eq_attr "type" "store3,store4")
+	    (eq_attr "neon_type" "none")))
+  "ca15_issue2,ca15_ls1+ca15_ls2,ca15_str,ca15_str")
+
+;; Simple execution unit bypasses
+(define_bypass 1 "cortex_a15_alu"
+	       "cortex_a15_alu,cortex_a15_alu_shift,cortex_a15_alu_shift_reg")
+(define_bypass 2 "cortex_a15_alu_shift"
+	       "cortex_a15_alu,cortex_a15_alu_shift,cortex_a15_alu_shift_reg")
+(define_bypass 2 "cortex_a15_alu_shift_reg"
+	       "cortex_a15_alu,cortex_a15_alu_shift,cortex_a15_alu_shift_reg")
+(define_bypass 1 "cortex_a15_alu" "cortex_a15_load1,cortex_a15_load3")
+(define_bypass 2 "cortex_a15_alu_shift" "cortex_a15_load1,cortex_a15_load3")
+(define_bypass 2 "cortex_a15_alu_shift_reg"
+	       "cortex_a15_load1,cortex_a15_load3")
--- a/src/gcc/config/arm/cortex-a9.md
+++ b/src/gcc/config/arm/cortex-a9.md
@@ -68,7 +68,8 @@
   "cortex_a9_mac_m1*2, cortex_a9_mac_m2, cortex_a9_p0_wb")
 (define_reservation "cortex_a9_mac"
   "cortex_a9_multcycle1*2 ,cortex_a9_mac_m2, cortex_a9_p0_wb")
-
+(define_reservation "cortex_a9_mult_long"
+  "cortex_a9_mac_m1*3, cortex_a9_mac_m2, cortex_a9_p0_wb")
 
 ;; Issue at the same time along the load store pipeline and
 ;; the VFP / Neon pipeline is not possible.
@@ -139,29 +140,35 @@
        (eq_attr "insn" "smlaxy"))
   "cortex_a9_mac16")
 
-
 (define_insn_reservation "cortex_a9_multiply" 4
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "insn" "mul"))
+       (eq_attr "insn" "mul,smmul,smmulr"))
        "cortex_a9_mult")
 
 (define_insn_reservation "cortex_a9_mac" 4
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "insn" "mla"))
+       (eq_attr "insn" "mla,smmla"))
        "cortex_a9_mac")
 
+(define_insn_reservation "cortex_a9_multiply_long" 5
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "insn" "smull,umull,smulls,umulls,smlal,smlals,umlal,umlals"))
+       "cortex_a9_mult_long")
+
 ;; An instruction with a result in E2 can be forwarded
 ;; to E2 or E1 or M1 or the load store unit in the next cycle.
 
 (define_bypass 1 "cortex_a9_dp"
                  "cortex_a9_dp_shift, cortex_a9_multiply,
  cortex_a9_load1_2, cortex_a9_dp, cortex_a9_store1_2,
- cortex_a9_mult16, cortex_a9_mac16, cortex_a9_mac, cortex_a9_store3_4, cortex_a9_load3_4")
+ cortex_a9_mult16, cortex_a9_mac16, cortex_a9_mac, cortex_a9_store3_4, cortex_a9_load3_4, 
+ cortex_a9_multiply_long")
 
 (define_bypass 2 "cortex_a9_dp_shift"
                  "cortex_a9_dp_shift, cortex_a9_multiply,
  cortex_a9_load1_2, cortex_a9_dp, cortex_a9_store1_2,
- cortex_a9_mult16, cortex_a9_mac16, cortex_a9_mac, cortex_a9_store3_4, cortex_a9_load3_4")
+ cortex_a9_mult16, cortex_a9_mac16, cortex_a9_mac, cortex_a9_store3_4, cortex_a9_load3_4,
+ cortex_a9_multiply_long")
 
 ;; An instruction in the load store pipeline can provide
 ;; read access to a DP instruction in the P0 default pipeline
@@ -212,7 +219,7 @@
 
 (define_bypass 1
   "cortex_a9_fps"
-  "cortex_a9_fadd, cortex_a9_fps, cortex_a9_fcmp, cortex_a9_dp, cortex_a9_dp_shift, cortex_a9_multiply")
+  "cortex_a9_fadd, cortex_a9_fps, cortex_a9_fcmp, cortex_a9_dp, cortex_a9_dp_shift, cortex_a9_multiply, cortex_a9_multiply_long")
 
 ;; Scheduling on the FP_ADD pipeline.
 (define_reservation "ca9fp_add" "ca9_issue_vfp_neon + ca9fp_add1, ca9fp_add2, ca9fp_add3, ca9fp_add4")
--- a/src/gcc/config/arm/driver-arm.c
+++ b/src/gcc/config/arm/driver-arm.c
@@ -0,0 +1,149 @@
+/* Subroutines for the gcc driver.
+   Copyright (C) 2011 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "configargs.h"
+
+struct vendor_cpu {
+  const char *part_no;
+  const char *arch_name;
+  const char *cpu_name;
+};
+
+static struct vendor_cpu arm_cpu_table[] = {
+    {"0x926", "armv5te", "arm926ej-s"},
+    {"0xa26", "armv5te", "arm1026ej-s"},
+    {"0xb02", "armv6k", "mpcore"},
+    {"0xb36", "armv6j", "arm1136j-s"},
+    {"0xb56", "armv6t2", "arm1156t2-s"},
+    {"0xb76", "armv6zk", "arm1176jz-s"},
+    {"0xc05", "armv7-a", "cortex-a5"},
+    {"0xc08", "armv7-a", "cortex-a8"},
+    {"0xc09", "armv7-a", "cortex-a9"},
+    {"0xc0f", "armv7-a", "cortex-a15"},
+    {"0xc14", "armv7-r", "cortex-r4"},
+    {"0xc15", "armv7-r", "cortex-r5"},
+    {"0xc20", "armv6-m", "cortex-m0"},
+    {"0xc21", "armv6-m", "cortex-m1"},
+    {"0xc23", "armv7-m", "cortex-m3"},
+    {"0xc24", "armv7e-m", "cortex-m4"},
+    {NULL, NULL, NULL}
+};
+
+static struct {
+  const char *vendor_no;
+  const struct vendor_cpu *vendor_parts;
+} vendors[] = {
+    {"0x41", arm_cpu_table},
+    {NULL, NULL}
+};
+
+/* This will be called by the spec parser in gcc.c when it sees
+   a %:local_cpu_detect(args) construct.  Currently it will be called
+   with either "arch", "cpu" or "tune" as argument depending on if
+   -march=native, -mcpu=native or -mtune=native is to be substituted.
+
+   It returns a string containing new command line parameters to be
+   put at the place of the above two options, depending on what CPU
+   this is executed.  E.g. "-march=armv7-a" on a Cortex-A8 for
+   -march=native.  If the routine can't detect a known processor,
+   the -march or -mtune option is discarded.
+
+   ARGC and ARGV are set depending on the actual arguments given
+   in the spec.  */
+const char *
+host_detect_local_cpu (int argc, const char **argv)
+{
+  const char *val = NULL;
+  char buf[128];
+  FILE *f = NULL;
+  bool arch;
+  const struct vendor_cpu *cpu_table = NULL;
+
+  if (argc < 1)
+    goto not_found;
+
+  arch = strcmp (argv[0], "arch") == 0;
+  if (!arch && strcmp (argv[0], "cpu") != 0 && strcmp (argv[0], "tune"))
+    goto not_found;
+
+  f = fopen ("/proc/cpuinfo", "r");
+  if (f == NULL)
+    goto not_found;
+
+  while (fgets (buf, sizeof (buf), f) != NULL)
+    {
+      /* Ensure that CPU implementer is ARM (0x41).  */
+      if (strncmp (buf, "CPU implementer", sizeof ("CPU implementer") - 1) == 0)
+	{
+	  int i;
+	  for (i = 0; vendors[i].vendor_no != NULL; i++)
+	    if (strstr (buf, vendors[i].vendor_no) != NULL)
+	      {
+		cpu_table = vendors[i].vendor_parts;
+		break;
+	      }
+	}
+
+      /* Detect arch/cpu.  */
+      if (strncmp (buf, "CPU part", sizeof ("CPU part") - 1) == 0)
+	{
+	  int i;
+
+	  if (cpu_table == NULL)
+	    goto not_found;
+
+	  for (i = 0; cpu_table[i].part_no != NULL; i++)
+	    if (strstr (buf, cpu_table[i].part_no) != NULL)
+	      {
+		val = arch ? cpu_table[i].arch_name : cpu_table[i].cpu_name;
+		break;
+	      }
+	  break;
+	}
+    }
+
+  fclose (f);
+
+  if (val == NULL)
+    goto not_found;
+
+  return concat ("-m", argv[0], "=", val, NULL);
+
+not_found:
+  {
+    unsigned int i;
+    unsigned int opt;
+    const char *search[] = {NULL, "arch"};
+
+    if (f)
+      fclose (f);
+
+    search[0] = argv[0];
+    for (opt = 0; opt < ARRAY_SIZE (search); opt++)
+      for (i = 0; i < ARRAY_SIZE (configure_default_options); i++)
+	if (strcmp (configure_default_options[i].name, search[opt]) == 0)
+	  return concat ("-m", search[opt], "=",
+			 configure_default_options[i].value, NULL);
+    return NULL;
+  }
+}
--- a/src/gcc/config/arm/elf.h
+++ b/src/gcc/config/arm/elf.h
@@ -56,8 +56,7 @@
 #define ASM_SPEC "\
 %{mbig-endian:-EB} \
 %{mlittle-endian:-EL} \
-%{mcpu=*:-mcpu=%*} \
-%{march=*:-march=%*} \
+%(asm_cpu_spec) \
 %{mapcs-*:-mapcs-%*} \
 %(subtarget_asm_float_spec) \
 %{mthumb-interwork:-mthumb-interwork} \
--- a/src/gcc/config/arm/iterators.md
+++ b/src/gcc/config/arm/iterators.md
@@ -33,6 +33,15 @@
 ;; A list of integer modes that are up to one word long
 (define_mode_iterator QHSI [QI HI SI])
 
+;; A list of integer modes that are less than a word
+(define_mode_iterator NARROW [QI HI])
+
+;; A list of all the integer modes upto 64bit
+(define_mode_iterator QHSD [QI HI SI DI])
+
+;; A list of the 32bit and 64bit integer modes
+(define_mode_iterator SIDI [SI DI])
+
 ;; Integer element sizes implemented by IWMMXT.
 (define_mode_iterator VMMX [V2SI V4HI V8QI])
 
@@ -194,24 +203,22 @@
 
 ;; Mode of pair of elements for each vector mode, to define transfer
 ;; size for structure lane/dup loads and stores.
-(define_mode_attr V_two_elem [(V8QI "HI") (V16QI "HI")
-                  (V4HI "SI") (V8HI "SI")
+(define_mode_attr V_two_elem [(V8QI "HI")   (V16QI "HI")
+                              (V4HI "SI")   (V8HI "SI")
                               (V2SI "V2SI") (V4SI "V2SI")
                               (V2SF "V2SF") (V4SF "V2SF")
                               (DI "V2DI")   (V2DI "V2DI")])
 
 ;; Similar, for three elements.
-;; ??? Should we define extra modes so that sizes of all three-element
-;; accesses can be accurately represented?
-(define_mode_attr V_three_elem [(V8QI "SI")   (V16QI "SI")
-                    (V4HI "V4HI") (V8HI "V4HI")
-                                (V2SI "V4SI") (V4SI "V4SI")
-                                (V2SF "V4SF") (V4SF "V4SF")
-                                (DI "EI")     (V2DI "EI")])
+(define_mode_attr V_three_elem [(V8QI "BLK") (V16QI "BLK")
+                                (V4HI "BLK") (V8HI "BLK")
+                                (V2SI "BLK") (V4SI "BLK")
+                                (V2SF "BLK") (V4SF "BLK")
+                                (DI "EI")    (V2DI "EI")])
 
 ;; Similar, for four elements.
 (define_mode_attr V_four_elem [(V8QI "SI")   (V16QI "SI")
-                   (V4HI "V4HI") (V8HI "V4HI")
+                               (V4HI "V4HI") (V8HI "V4HI")
                                (V2SI "V4SI") (V4SI "V4SI")
                                (V2SF "V4SF") (V4SF "V4SF")
                                (DI "OI")     (V2DI "OI")])
@@ -381,10 +388,17 @@
 (define_mode_attr qhs_zextenddi_cond [(SI "") (HI "&& arm_arch6") (QI "")])
 (define_mode_attr qhs_sextenddi_cond [(SI "") (HI "&& arm_arch6")
 				      (QI "&& arm_arch6")])
-(define_mode_attr qhs_extenddi_op [(SI "s_register_operand")
+(define_mode_attr qhs_zextenddi_op [(SI "s_register_operand")
 				   (HI "nonimmediate_operand")
 				   (QI "nonimmediate_operand")])
-(define_mode_attr qhs_extenddi_cstr [(SI "r") (HI "rm") (QI "rm")])
+(define_mode_attr qhs_extenddi_op [(SI "s_register_operand")
+				   (HI "nonimmediate_operand")
+				   (QI "arm_reg_or_extendqisi_mem_op")])
+(define_mode_attr qhs_extenddi_cstr [(SI "r") (HI "rm") (QI "rUq")])
+(define_mode_attr qhs_zextenddi_cstr [(SI "r") (HI "rm") (QI "rm")])
+
+;; Mode attribute for vshll.
+(define_mode_attr V_innermode [(V8QI "QI") (V4HI "HI") (V2SI "SI")])
 
 ;;----------------------------------------------------------------------------
 ;; Code attributes
--- a/src/gcc/config/arm/linux-atomic-64bit.c
+++ b/src/gcc/config/arm/linux-atomic-64bit.c
@@ -0,0 +1,166 @@
+/* 64bit Linux-specific atomic operations for ARM EABI.
+   Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+   Based on linux-atomic.c
+
+   64 bit additions david.gilbert@linaro.org
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* 64bit helper functions for atomic operations; the compiler will
+   call these when the code is compiled for a CPU without ldrexd/strexd.
+   (If the CPU had those then the compiler inlines the operation).
+
+   These helpers require a kernel helper that's only present on newer
+   kernels; we check for that in an init section and bail out rather
+   unceremoneously.  */
+
+extern unsigned int __write (int fd, const void *buf, unsigned int count);
+extern void abort (void);
+
+/* Kernel helper for compare-and-exchange.  */
+typedef int (__kernel_cmpxchg64_t) (const long long* oldval,
+					const long long* newval,
+					long long *ptr);
+#define __kernel_cmpxchg64 (*(__kernel_cmpxchg64_t *) 0xffff0f60)
+
+/* Kernel helper page version number.  */
+#define __kernel_helper_version (*(unsigned int *)0xffff0ffc)
+
+/* Check that the kernel has a new enough version at load.  */
+static void __check_for_sync8_kernelhelper (void)
+{
+  if (__kernel_helper_version < 5)
+    {
+      const char err[] = "A newer kernel is required to run this binary. "
+				"(__kernel_cmpxchg64 helper)\n";
+      /* At this point we need a way to crash with some information
+	 for the user - I'm not sure I can rely on much else being
+	 available at this point, so do the same as generic-morestack.c
+	 write () and abort ().  */
+      __write (2 /* stderr.  */, err, sizeof (err));
+      abort ();
+    }
+};
+
+static void (*__sync8_kernelhelper_inithook[]) (void)
+		__attribute__ ((used, section (".init_array"))) = {
+  &__check_for_sync8_kernelhelper
+};
+
+#define HIDDEN __attribute__ ((visibility ("hidden")))
+
+#define FETCH_AND_OP_WORD64(OP, PFX_OP, INF_OP)			\
+  long long HIDDEN						\
+  __sync_fetch_and_##OP##_8 (long long *ptr, long long val)	\
+  {								\
+    int failure;						\
+    long long tmp,tmp2;						\
+								\
+    do {							\
+      tmp = *ptr;						\
+      tmp2 = PFX_OP (tmp INF_OP val);				\
+      failure = __kernel_cmpxchg64 (&tmp, &tmp2, ptr);		\
+    } while (failure != 0);					\
+								\
+    return tmp;							\
+  }
+
+FETCH_AND_OP_WORD64 (add,   , +)
+FETCH_AND_OP_WORD64 (sub,   , -)
+FETCH_AND_OP_WORD64 (or,    , |)
+FETCH_AND_OP_WORD64 (and,   , &)
+FETCH_AND_OP_WORD64 (xor,   , ^)
+FETCH_AND_OP_WORD64 (nand, ~, &)
+
+#define NAME_oldval(OP, WIDTH) __sync_fetch_and_##OP##_##WIDTH
+#define NAME_newval(OP, WIDTH) __sync_##OP##_and_fetch_##WIDTH
+
+/* Implement both __sync_<op>_and_fetch and __sync_fetch_and_<op> for
+   subword-sized quantities.  */
+
+#define OP_AND_FETCH_WORD64(OP, PFX_OP, INF_OP)			\
+  long long HIDDEN						\
+  __sync_##OP##_and_fetch_8 (long long *ptr, long long val)	\
+  {								\
+    int failure;						\
+    long long tmp,tmp2;						\
+								\
+    do {							\
+      tmp = *ptr;						\
+      tmp2 = PFX_OP (tmp INF_OP val);				\
+      failure = __kernel_cmpxchg64 (&tmp, &tmp2, ptr);		\
+    } while (failure != 0);					\
+								\
+    return tmp2;						\
+  }
+
+OP_AND_FETCH_WORD64 (add,   , +)
+OP_AND_FETCH_WORD64 (sub,   , -)
+OP_AND_FETCH_WORD64 (or,    , |)
+OP_AND_FETCH_WORD64 (and,   , &)
+OP_AND_FETCH_WORD64 (xor,   , ^)
+OP_AND_FETCH_WORD64 (nand, ~, &)
+
+long long HIDDEN
+__sync_val_compare_and_swap_8 (long long *ptr, long long oldval,
+				long long newval)
+{
+  int failure;
+  long long actual_oldval;
+
+  while (1)
+    {
+      actual_oldval = *ptr;
+
+      if (__builtin_expect (oldval != actual_oldval, 0))
+	return actual_oldval;
+
+      failure = __kernel_cmpxchg64 (&actual_oldval, &newval, ptr);
+
+      if (__builtin_expect (!failure, 1))
+	return oldval;
+    }
+}
+
+typedef unsigned char bool;
+
+bool HIDDEN
+__sync_bool_compare_and_swap_8 (long long *ptr, long long oldval,
+				 long long newval)
+{
+  int failure = __kernel_cmpxchg64 (&oldval, &newval, ptr);
+  return (failure == 0);
+}
+
+long long HIDDEN
+__sync_lock_test_and_set_8 (long long *ptr, long long val)
+{
+  int failure;
+  long long oldval;
+
+  do {
+    oldval = *ptr;
+    failure = __kernel_cmpxchg64 (&oldval, &val, ptr);
+  } while (failure != 0);
+
+  return oldval;
+}
--- a/src/gcc/config/arm/linux-atomic.c
+++ b/src/gcc/config/arm/linux-atomic.c
@@ -32,8 +32,8 @@
 #define __kernel_dmb (*(__kernel_dmb_t *) 0xffff0fa0)
 
 /* Note: we implement byte, short and int versions of atomic operations using
-   the above kernel helpers, but there is no support for "long long" (64-bit)
-   operations as yet.  */
+   the above kernel helpers; see linux-atomic-64bit.c for "long long" (64-bit)
+   operations.  */
 
 #define HIDDEN __attribute__ ((visibility ("hidden")))
 
@@ -273,6 +273,7 @@
     *ptr = 0;								\
   }
 
+SYNC_LOCK_RELEASE (long long,   8)
 SYNC_LOCK_RELEASE (int,   4)
 SYNC_LOCK_RELEASE (short, 2)
 SYNC_LOCK_RELEASE (char,  1)
--- a/src/gcc/config/arm/linux-eabi.h
+++ b/src/gcc/config/arm/linux-eabi.h
@@ -32,7 +32,8 @@
   while (false)
 
 /* We default to a soft-float ABI so that binaries can run on all
-   target hardware.  */
+   target hardware.  If you override this to use the hard-float ABI then
+   change the setting of GLIBC_DYNAMIC_LINKER_DEFAULT as well.  */
 #undef  TARGET_DEFAULT_FLOAT_ABI
 #define TARGET_DEFAULT_FLOAT_ABI ARM_FLOAT_ABI_SOFT
 
@@ -59,10 +60,23 @@
 #undef  SUBTARGET_EXTRA_LINK_SPEC
 #define SUBTARGET_EXTRA_LINK_SPEC " -m " TARGET_LINKER_EMULATION
 
-/* Use ld-linux.so.3 so that it will be possible to run "classic"
-   GNU/Linux binaries on an EABI system.  */
+/* GNU/Linux on ARM currently supports three dynamic linkers:
+   - ld-linux.so.2 - for the legacy ABI
+   - ld-linux.so.3 - for the EABI-derived soft-float ABI
+   - ld-linux-armhf.so.3 - for the EABI-derived hard-float ABI.
+   All the dynamic linkers live in /lib.
+   We default to soft-float, but this can be overridden by changing both
+   GLIBC_DYNAMIC_LINKER_DEFAULT and TARGET_DEFAULT_FLOAT_ABI.  */
+
 #undef  GLIBC_DYNAMIC_LINKER
-#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux.so.3"
+#define GLIBC_DYNAMIC_LINKER_SOFT_FLOAT "/lib/ld-linux.so.3"
+#define GLIBC_DYNAMIC_LINKER_HARD_FLOAT "/lib/ld-linux-armhf.so.3"
+#define GLIBC_DYNAMIC_LINKER_DEFAULT GLIBC_DYNAMIC_LINKER_SOFT_FLOAT
+
+#define GLIBC_DYNAMIC_LINKER \
+   "%{mfloat-abi=hard:" GLIBC_DYNAMIC_LINKER_HARD_FLOAT "} \
+    %{mfloat-abi=soft*:" GLIBC_DYNAMIC_LINKER_SOFT_FLOAT "} \
+    %{!mfloat-abi=*:" GLIBC_DYNAMIC_LINKER_DEFAULT "}"
 
 /* At this point, bpabi.h will have clobbered LINK_SPEC.  We want to
    use the GNU/Linux version, not the generic BPABI version.  */
--- a/src/gcc/config/arm/neon.md
+++ b/src/gcc/config/arm/neon.md
@@ -783,30 +783,57 @@
 
 (define_insn "orn<mode>3_neon"
   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
-	(ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
-		 (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))))]
+	(ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
+		 (match_operand:VDQ 1 "s_register_operand" "w")))]
   "TARGET_NEON"
   "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
   [(set_attr "neon_type" "neon_int_1")]
 )
 
-(define_insn "orndi3_neon"
-  [(set (match_operand:DI 0 "s_register_operand" "=w,?=&r,?&r")
-	(ior:DI (match_operand:DI 1 "s_register_operand" "w,r,0")
-	         (not:DI (match_operand:DI 2 "s_register_operand" "w,0,r"))))]
+;; TODO: investigate whether we should disable 
+;; this and bicdi3_neon for the A8 in line with the other
+;; changes above. 
+(define_insn_and_split "orndi3_neon"
+  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
+	(ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r"))
+		(match_operand:DI 1 "s_register_operand" "w,r,r,0")))]
   "TARGET_NEON"
   "@
    vorn\t%P0, %P1, %P2
    #
+   #
    #"
-  [(set_attr "neon_type" "neon_int_1,*,*")
-   (set_attr "length" "*,8,8")]
+  "reload_completed && 
+   (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))"
+  [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
+   (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
+  "
+  {
+    if (TARGET_THUMB2)
+      {
+        operands[3] = gen_highpart (SImode, operands[0]);
+        operands[0] = gen_lowpart (SImode, operands[0]);
+        operands[4] = gen_highpart (SImode, operands[2]);
+        operands[2] = gen_lowpart (SImode, operands[2]);
+        operands[5] = gen_highpart (SImode, operands[1]);
+        operands[1] = gen_lowpart (SImode, operands[1]);
+      }
+    else
+      {
+        emit_insn (gen_one_cmpldi2 (operands[0], operands[2]));
+        emit_insn (gen_iordi3 (operands[0], operands[1], operands[0]));
+        DONE;
+      }
+  }"
+  [(set_attr "neon_type" "neon_int_1,*,*,*")
+   (set_attr "length" "*,16,8,8")
+   (set_attr "arch" "any,a,t2,t2")]
 )
 
 (define_insn "bic<mode>3_neon"
   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
-	(and:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
-		  (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))))]
+	(and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
+		 (match_operand:VDQ 1 "s_register_operand" "w")))]
   "TARGET_NEON"
   "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
   [(set_attr "neon_type" "neon_int_1")]
@@ -929,17 +956,59 @@
 ; SImode elements.
 
 (define_insn "vashl<mode>3"
+  [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
+	(ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
+		      (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))]
+  "TARGET_NEON"
+  {
+    switch (which_alternative)
+      {
+        case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
+        case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
+                         			    <MODE>mode,
+						    VALID_NEON_QREG_MODE (<MODE>mode),
+						    true);
+        default: gcc_unreachable ();
+      }
+  }
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                    (const_string "neon_vshl_ddd")
+                    (const_string "neon_shift_3")))]
+)
+
+(define_insn "vashr<mode>3_imm"
   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
-	(ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
-		      (match_operand:VDQIW 2 "s_register_operand" "w")))]
+	(ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
+			(match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
   "TARGET_NEON"
-  "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  {
+    return neon_output_shift_immediate ("vshr", 's', &operands[2],
+					<MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
+					false);
+  }
   [(set (attr "neon_type")
       (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
                     (const_string "neon_vshl_ddd")
                     (const_string "neon_shift_3")))]
 )
 
+(define_insn "vlshr<mode>3_imm"
+  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
+	(lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
+			(match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
+  "TARGET_NEON"
+  {
+    return neon_output_shift_immediate ("vshr", 'u', &operands[2],
+					<MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
+					false);
+  }              
+  [(set (attr "neon_type")
+	(if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+		      (const_string "neon_vshl_ddd")
+		      (const_string "neon_shift_3")))]
+)
+
 ; Used for implementing logical shift-right, which is a left-shift by a negative
 ; amount, with signed operands. This is essentially the same as ashl<mode>3
 ; above, but using an unspec in case GCC tries anything tricky with negative
@@ -977,28 +1046,34 @@
 (define_expand "vashr<mode>3"
   [(set (match_operand:VDQIW 0 "s_register_operand" "")
 	(ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
-			(match_operand:VDQIW 2 "s_register_operand" "")))]
+			(match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
   "TARGET_NEON"
 {
   rtx neg = gen_reg_rtx (<MODE>mode);
-
-  emit_insn (gen_neg<mode>2 (neg, operands[2]));
-  emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
-
+  if (REG_P (operands[2]))
+    {
+      emit_insn (gen_neg<mode>2 (neg, operands[2]));
+      emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
+    }
+  else
+    emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
   DONE;
 })
 
 (define_expand "vlshr<mode>3"
   [(set (match_operand:VDQIW 0 "s_register_operand" "")
 	(lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
-			(match_operand:VDQIW 2 "s_register_operand" "")))]
+			(match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
   "TARGET_NEON"
 {
   rtx neg = gen_reg_rtx (<MODE>mode);
-
-  emit_insn (gen_neg<mode>2 (neg, operands[2]));
-  emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
-
+  if (REG_P (operands[2]))
+    {
+      emit_insn (gen_neg<mode>2 (neg, operands[2]));
+      emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
+    }
+  else
+    emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
   DONE;
 })
 
@@ -1160,66 +1235,14 @@
                     (const_string "neon_int_1") (const_string "neon_int_5")))]
 )
 
-; FIXME: We wouldn't need the following insns if we could write subregs of
-; vector registers. Make an attempt at removing unnecessary moves, though
-; we're really at the mercy of the register allocator.
-
-(define_insn "neon_move_lo_quad_<mode>"
-  [(set (match_operand:ANY128 0 "s_register_operand" "+w")
-        (vec_concat:ANY128
-          (match_operand:<V_HALF> 1 "s_register_operand" "w")
-          (vec_select:<V_HALF> 
-		(match_dup 0)
-	        (match_operand:ANY128 2 "vect_par_constant_high" ""))))]
-  "TARGET_NEON"
-{
-  int dest = REGNO (operands[0]);
-  int src = REGNO (operands[1]);
-
-  if (dest != src)
-    return "vmov\t%e0, %P1";
-  else
-    return "";
-}
-  [(set_attr "neon_type" "neon_bp_simple")]
-)
-
-(define_insn "neon_move_hi_quad_<mode>"
-  [(set (match_operand:ANY128 0 "s_register_operand" "+w")
-        (vec_concat:ANY128
-          (vec_select:<V_HALF>
-		(match_dup 0)
-	        (match_operand:ANY128 2 "vect_par_constant_low" ""))
-          (match_operand:<V_HALF> 1 "s_register_operand" "w")))]
-	   
-  "TARGET_NEON"
-{
-  int dest = REGNO (operands[0]);
-  int src = REGNO (operands[1]);
-
-  if (dest != src)
-    return "vmov\t%f0, %P1";
-  else
-    return "";
-}
-  [(set_attr "neon_type" "neon_bp_simple")]
-)
-
 (define_expand "move_hi_quad_<mode>"
  [(match_operand:ANY128 0 "s_register_operand" "")
   (match_operand:<V_HALF> 1 "s_register_operand" "")]
  "TARGET_NEON"
 {
-  rtvec v = rtvec_alloc (<V_mode_nunits>/2);
-  rtx t1;
-  int i;
-
-  for (i=0; i < (<V_mode_nunits>/2); i++)
-     RTVEC_ELT (v, i) = GEN_INT (i);
-
-  t1 = gen_rtx_PARALLEL (<MODE>mode, v);
-  emit_insn (gen_neon_move_hi_quad_<mode> (operands[0], operands[1], t1));
-
+  emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
+				       GET_MODE_SIZE (<V_HALF>mode)),
+		  operands[1]);
   DONE;
 })
 
@@ -1228,16 +1251,9 @@
   (match_operand:<V_HALF> 1 "s_register_operand" "")]
  "TARGET_NEON"
 {
-  rtvec v = rtvec_alloc (<V_mode_nunits>/2);
-  rtx t1;
-  int i;
-
-  for (i=0; i < (<V_mode_nunits>/2); i++)
-     RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
-
-  t1 = gen_rtx_PARALLEL (<MODE>mode, v);
-  emit_insn (gen_neon_move_lo_quad_<mode> (operands[0], operands[1], t1));
-
+  emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
+				       <MODE>mode, 0),
+		  operands[1]);
   DONE;
 })
 
@@ -2875,183 +2891,27 @@
    (set_attr "neon_type" "neon_bp_simple")]
 )
 
-(define_insn "neon_vget_highv16qi"
-  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
-	(vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
-                         (parallel [(const_int 8) (const_int 9)
-			            (const_int 10) (const_int 11)
-				    (const_int 12) (const_int 13)
-				    (const_int 14) (const_int 15)])))]
-  "TARGET_NEON"
-{
-  int dest = REGNO (operands[0]);
-  int src = REGNO (operands[1]);
-
-  if (dest != src + 2)
-    return "vmov\t%P0, %f1";
-  else
-    return "";
-}
-  [(set_attr "neon_type" "neon_bp_simple")]
-)
-
-(define_insn "neon_vget_highv8hi"
-  [(set (match_operand:V4HI 0 "s_register_operand" "=w")
-	(vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
-	                 (parallel [(const_int 4) (const_int 5)
-			            (const_int 6) (const_int 7)])))]
-  "TARGET_NEON"
-{
-  int dest = REGNO (operands[0]);
-  int src = REGNO (operands[1]);
-
-  if (dest != src + 2)
-    return "vmov\t%P0, %f1";
-  else
-    return "";
-}
-  [(set_attr "neon_type" "neon_bp_simple")]
-)
-
-(define_insn "neon_vget_highv4si"
-  [(set (match_operand:V2SI 0 "s_register_operand" "=w")
-	(vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
-	                 (parallel [(const_int 2) (const_int 3)])))]
-  "TARGET_NEON"
-{
-  int dest = REGNO (operands[0]);
-  int src = REGNO (operands[1]);
-
-  if (dest != src + 2)
-    return "vmov\t%P0, %f1";
-  else
-    return "";
-}
-  [(set_attr "neon_type" "neon_bp_simple")]
-)
-
-(define_insn "neon_vget_highv4sf"
-  [(set (match_operand:V2SF 0 "s_register_operand" "=w")
-	(vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
-	                 (parallel [(const_int 2) (const_int 3)])))]
-  "TARGET_NEON"
-{
-  int dest = REGNO (operands[0]);
-  int src = REGNO (operands[1]);
-
-  if (dest != src + 2)
-    return "vmov\t%P0, %f1";
-  else
-    return "";
-}
-  [(set_attr "neon_type" "neon_bp_simple")]
-)
-
-(define_insn "neon_vget_highv2di"
-  [(set (match_operand:DI 0 "s_register_operand" "=w")
-	(vec_select:DI (match_operand:V2DI 1 "s_register_operand" "w")
-	               (parallel [(const_int 1)])))]
-  "TARGET_NEON"
-{
-  int dest = REGNO (operands[0]);
-  int src = REGNO (operands[1]);
-
-  if (dest != src + 2)
-    return "vmov\t%P0, %f1";
-  else
-    return "";
-}
-  [(set_attr "neon_type" "neon_bp_simple")]
-)
-
-(define_insn "neon_vget_lowv16qi"
-  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
-	(vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
-                         (parallel [(const_int 0) (const_int 1)
-			            (const_int 2) (const_int 3)
-				    (const_int 4) (const_int 5)
-				    (const_int 6) (const_int 7)])))]
-  "TARGET_NEON"
-{
-  int dest = REGNO (operands[0]);
-  int src = REGNO (operands[1]);
-
-  if (dest != src)
-    return "vmov\t%P0, %e1";
-  else
-    return "";
-}
-  [(set_attr "neon_type" "neon_bp_simple")]
-)
-
-(define_insn "neon_vget_lowv8hi"
-  [(set (match_operand:V4HI 0 "s_register_operand" "=w")
-	(vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
-	                 (parallel [(const_int 0) (const_int 1)
-			            (const_int 2) (const_int 3)])))]
-  "TARGET_NEON"
-{
-  int dest = REGNO (operands[0]);
-  int src = REGNO (operands[1]);
-
-  if (dest != src)
-    return "vmov\t%P0, %e1";
-  else
-    return "";
-}
-  [(set_attr "neon_type" "neon_bp_simple")]
-)
-
-(define_insn "neon_vget_lowv4si"
-  [(set (match_operand:V2SI 0 "s_register_operand" "=w")
-	(vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
-	                 (parallel [(const_int 0) (const_int 1)])))]
-  "TARGET_NEON"
-{
-  int dest = REGNO (operands[0]);
-  int src = REGNO (operands[1]);
-
-  if (dest != src)
-    return "vmov\t%P0, %e1";
-  else
-    return "";
-}
-  [(set_attr "neon_type" "neon_bp_simple")]
-)
-
-(define_insn "neon_vget_lowv4sf"
-  [(set (match_operand:V2SF 0 "s_register_operand" "=w")
-	(vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
-	                 (parallel [(const_int 0) (const_int 1)])))]
+(define_expand "neon_vget_high<mode>"
+  [(match_operand:<V_HALF> 0 "s_register_operand")
+   (match_operand:VQX 1 "s_register_operand")]
   "TARGET_NEON"
 {
-  int dest = REGNO (operands[0]);
-  int src = REGNO (operands[1]);
-
-  if (dest != src)
-    return "vmov\t%P0, %e1";
-  else
-    return "";
-}
-  [(set_attr "neon_type" "neon_bp_simple")]
-)
+  emit_move_insn (operands[0],
+		  simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
+				       GET_MODE_SIZE (<V_HALF>mode)));
+  DONE;
+})
 
-(define_insn "neon_vget_lowv2di"
-  [(set (match_operand:DI 0 "s_register_operand" "=w")
-	(vec_select:DI (match_operand:V2DI 1 "s_register_operand" "w")
-	               (parallel [(const_int 0)])))]
+(define_expand "neon_vget_low<mode>"
+  [(match_operand:<V_HALF> 0 "s_register_operand")
+   (match_operand:VQX 1 "s_register_operand")]
   "TARGET_NEON"
 {
-  int dest = REGNO (operands[0]);
-  int src = REGNO (operands[1]);
-
-  if (dest != src)
-    return "vmov\t%P0, %e1";
-  else
-    return "";
-}
-  [(set_attr "neon_type" "neon_bp_simple")]
-)
+  emit_move_insn (operands[0],
+		  simplify_gen_subreg (<V_HALF>mode, operands[1],
+				       <MODE>mode, 0));
+  DONE;
+})
 
 (define_insn "neon_vcvt<mode>"
   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
@@ -4248,18 +4108,24 @@
   DONE;
 })
 
+(define_expand "vec_load_lanes<mode><mode>"
+  [(set (match_operand:VDQX 0 "s_register_operand")
+        (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
+                     UNSPEC_VLD1))]
+  "TARGET_NEON")
+
 (define_insn "neon_vld1<mode>"
   [(set (match_operand:VDQX 0 "s_register_operand" "=w")
-        (unspec:VDQX [(mem:VDQX (match_operand:SI 1 "s_register_operand" "r"))]
+        (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
                     UNSPEC_VLD1))]
   "TARGET_NEON"
-  "vld1.<V_sz_elem>\t%h0, [%1]"
+  "vld1.<V_sz_elem>\t%h0, %A1"
   [(set_attr "neon_type" "neon_vld1_1_2_regs")]
 )
 
 (define_insn "neon_vld1_lane<mode>"
   [(set (match_operand:VDX 0 "s_register_operand" "=w")
-        (unspec:VDX [(mem:<V_elem> (match_operand:SI 1 "s_register_operand" "r"))
+        (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
                      (match_operand:VDX 2 "s_register_operand" "0")
                      (match_operand:SI 3 "immediate_operand" "i")]
                     UNSPEC_VLD1_LANE))]
@@ -4270,9 +4136,9 @@
   if (lane < 0 || lane >= max)
     error ("lane out of range");
   if (max == 1)
-    return "vld1.<V_sz_elem>\t%P0, [%1]";
+    return "vld1.<V_sz_elem>\t%P0, %A1";
   else
-    return "vld1.<V_sz_elem>\t{%P0[%c3]}, [%1]";
+    return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
 }
   [(set (attr "neon_type")
       (if_then_else (eq (const_string "<V_mode_nunits>") (const_int 2))
@@ -4282,7 +4148,7 @@
 
 (define_insn "neon_vld1_lane<mode>"
   [(set (match_operand:VQX 0 "s_register_operand" "=w")
-        (unspec:VQX [(mem:<V_elem> (match_operand:SI 1 "s_register_operand" "r"))
+        (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
                      (match_operand:VQX 2 "s_register_operand" "0")
                      (match_operand:SI 3 "immediate_operand" "i")]
                     UNSPEC_VLD1_LANE))]
@@ -4301,9 +4167,9 @@
     }
   operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
   if (max == 2)
-    return "vld1.<V_sz_elem>\t%P0, [%1]";
+    return "vld1.<V_sz_elem>\t%P0, %A1";
   else
-    return "vld1.<V_sz_elem>\t{%P0[%c3]}, [%1]";
+    return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
 }
   [(set (attr "neon_type")
       (if_then_else (eq (const_string "<V_mode_nunits>") (const_int 2))
@@ -4313,14 +4179,14 @@
 
 (define_insn "neon_vld1_dup<mode>"
   [(set (match_operand:VDX 0 "s_register_operand" "=w")
-        (unspec:VDX [(mem:<V_elem> (match_operand:SI 1 "s_register_operand" "r"))]
+        (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")]
                     UNSPEC_VLD1_DUP))]
   "TARGET_NEON"
 {
   if (GET_MODE_NUNITS (<MODE>mode) > 1)
-    return "vld1.<V_sz_elem>\t{%P0[]}, [%1]";
+    return "vld1.<V_sz_elem>\t{%P0[]}, %A1";
   else
-    return "vld1.<V_sz_elem>\t%h0, [%1]";
+    return "vld1.<V_sz_elem>\t%h0, %A1";
 }
   [(set (attr "neon_type")
       (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
@@ -4330,14 +4196,14 @@
 
 (define_insn "neon_vld1_dup<mode>"
   [(set (match_operand:VQX 0 "s_register_operand" "=w")
-        (unspec:VQX [(mem:<V_elem> (match_operand:SI 1 "s_register_operand" "r"))]
+        (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")]
                     UNSPEC_VLD1_DUP))]
   "TARGET_NEON"
 {
   if (GET_MODE_NUNITS (<MODE>mode) > 2)
-    return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, [%1]";
+    return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
   else
-    return "vld1.<V_sz_elem>\t%h0, [%1]";
+    return "vld1.<V_sz_elem>\t%h0, %A1";
 }
   [(set (attr "neon_type")
       (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
@@ -4345,16 +4211,22 @@
                     (const_string "neon_vld1_1_2_regs")))]
 )
 
+(define_expand "vec_store_lanes<mode><mode>"
+  [(set (match_operand:VDQX 0 "neon_struct_operand")
+	(unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
+		     UNSPEC_VST1))]
+  "TARGET_NEON")
+
 (define_insn "neon_vst1<mode>"
-  [(set (mem:VDQX (match_operand:SI 0 "s_register_operand" "r"))
+  [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
 	(unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
 		     UNSPEC_VST1))]
   "TARGET_NEON"
-  "vst1.<V_sz_elem>\t%h1, [%0]"
+  "vst1.<V_sz_elem>\t%h1, %A0"
   [(set_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")])
 
 (define_insn "neon_vst1_lane<mode>"
-  [(set (mem:<V_elem> (match_operand:SI 0 "s_register_operand" "r"))
+  [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
 	(vec_select:<V_elem>
 	  (match_operand:VDX 1 "s_register_operand" "w")
 	  (parallel [(match_operand:SI 2 "neon_lane_number" "i")])))]
@@ -4365,9 +4237,9 @@
   if (lane < 0 || lane >= max)
     error ("lane out of range");
   if (max == 1)
-    return "vst1.<V_sz_elem>\t{%P1}, [%0]";
+    return "vst1.<V_sz_elem>\t{%P1}, %A0";
   else
-    return "vst1.<V_sz_elem>\t{%P1[%c2]}, [%0]";
+    return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
 }
   [(set (attr "neon_type")
       (if_then_else (eq (const_string "<V_mode_nunits>") (const_int 1))
@@ -4375,7 +4247,7 @@
                     (const_string "neon_vst1_vst2_lane")))])
 
 (define_insn "neon_vst1_lane<mode>"
-  [(set (mem:<V_elem> (match_operand:SI 0 "s_register_operand" "r"))
+  [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
         (vec_select:<V_elem>
            (match_operand:VQX 1 "s_register_operand" "w")
            (parallel [(match_operand:SI 2 "neon_lane_number" "i")])))]
@@ -4394,24 +4266,31 @@
     }
   operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
   if (max == 2)
-    return "vst1.<V_sz_elem>\t{%P1}, [%0]";
+    return "vst1.<V_sz_elem>\t{%P1}, %A0";
   else
-    return "vst1.<V_sz_elem>\t{%P1[%c2]}, [%0]";
+    return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
 }
   [(set_attr "neon_type" "neon_vst1_vst2_lane")]
 )
 
+(define_expand "vec_load_lanesti<mode>"
+  [(set (match_operand:TI 0 "s_register_operand")
+        (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
+                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+		   UNSPEC_VLD2))]
+  "TARGET_NEON")
+
 (define_insn "neon_vld2<mode>"
   [(set (match_operand:TI 0 "s_register_operand" "=w")
-        (unspec:TI [(mem:TI (match_operand:SI 1 "s_register_operand" "r"))
+        (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
                    UNSPEC_VLD2))]
   "TARGET_NEON"
 {
   if (<V_sz_elem> == 64)
-    return "vld1.64\t%h0, [%1]";
+    return "vld1.64\t%h0, %A1";
   else
-    return "vld2.<V_sz_elem>\t%h0, [%1]";
+    return "vld2.<V_sz_elem>\t%h0, %A1";
 }
   [(set (attr "neon_type")
       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
@@ -4419,18 +4298,25 @@
                     (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes")))]
 )
 
+(define_expand "vec_load_lanesoi<mode>"
+  [(set (match_operand:OI 0 "s_register_operand")
+        (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
+                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+		   UNSPEC_VLD2))]
+  "TARGET_NEON")
+
 (define_insn "neon_vld2<mode>"
   [(set (match_operand:OI 0 "s_register_operand" "=w")
-        (unspec:OI [(mem:OI (match_operand:SI 1 "s_register_operand" "r"))
+        (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
                    UNSPEC_VLD2))]
   "TARGET_NEON"
-  "vld2.<V_sz_elem>\t%h0, [%1]"
+  "vld2.<V_sz_elem>\t%h0, %A1"
   [(set_attr "neon_type" "neon_vld2_2_regs_vld1_vld2_all_lanes")])
 
 (define_insn "neon_vld2_lane<mode>"
   [(set (match_operand:TI 0 "s_register_operand" "=w")
-        (unspec:TI [(mem:<V_two_elem> (match_operand:SI 1 "s_register_operand" "r"))
+        (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
                     (match_operand:TI 2 "s_register_operand" "0")
                     (match_operand:SI 3 "immediate_operand" "i")
                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
@@ -4447,7 +4333,7 @@
   ops[1] = gen_rtx_REG (DImode, regno + 2);
   ops[2] = operands[1];
   ops[3] = operands[3];
-  output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, [%2]", ops);
+  output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
   return "";
 }
   [(set_attr "neon_type" "neon_vld1_vld2_lane")]
@@ -4455,7 +4341,7 @@
 
 (define_insn "neon_vld2_lane<mode>"
   [(set (match_operand:OI 0 "s_register_operand" "=w")
-        (unspec:OI [(mem:<V_two_elem> (match_operand:SI 1 "s_register_operand" "r"))
+        (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
                     (match_operand:OI 2 "s_register_operand" "0")
                     (match_operand:SI 3 "immediate_operand" "i")
                     (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
@@ -4477,7 +4363,7 @@
   ops[1] = gen_rtx_REG (DImode, regno + 4);
   ops[2] = operands[1];
   ops[3] = GEN_INT (lane);
-  output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, [%2]", ops);
+  output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
   return "";
 }
   [(set_attr "neon_type" "neon_vld1_vld2_lane")]
@@ -4485,15 +4371,15 @@
 
 (define_insn "neon_vld2_dup<mode>"
   [(set (match_operand:TI 0 "s_register_operand" "=w")
-        (unspec:TI [(mem:<V_two_elem> (match_operand:SI 1 "s_register_operand" "r"))
+        (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
                    UNSPEC_VLD2_DUP))]
   "TARGET_NEON"
 {
   if (GET_MODE_NUNITS (<MODE>mode) > 1)
-    return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, [%1]";
+    return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
   else
-    return "vld1.<V_sz_elem>\t%h0, [%1]";
+    return "vld1.<V_sz_elem>\t%h0, %A1";
 }
   [(set (attr "neon_type")
       (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
@@ -4501,17 +4387,24 @@
                     (const_string "neon_vld1_1_2_regs")))]
 )
 
+(define_expand "vec_store_lanesti<mode>"
+  [(set (match_operand:TI 0 "neon_struct_operand")
+	(unspec:TI [(match_operand:TI 1 "s_register_operand")
+                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VST2))]
+  "TARGET_NEON")
+
 (define_insn "neon_vst2<mode>"
-  [(set (mem:TI (match_operand:SI 0 "s_register_operand" "r"))
+  [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
         (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
                    UNSPEC_VST2))]
   "TARGET_NEON"
 {
   if (<V_sz_elem> == 64)
-    return "vst1.64\t%h1, [%0]";
+    return "vst1.64\t%h1, %A0";
   else
-    return "vst2.<V_sz_elem>\t%h1, [%0]";
+    return "vst2.<V_sz_elem>\t%h1, %A0";
 }
   [(set (attr "neon_type")
       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
@@ -4519,18 +4412,25 @@
                     (const_string "neon_vst1_1_2_regs_vst2_2_regs")))]
 )
 
+(define_expand "vec_store_lanesoi<mode>"
+  [(set (match_operand:OI 0 "neon_struct_operand")
+	(unspec:OI [(match_operand:OI 1 "s_register_operand")
+                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VST2))]
+  "TARGET_NEON")
+
 (define_insn "neon_vst2<mode>"
-  [(set (mem:OI (match_operand:SI 0 "s_register_operand" "r"))
+  [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
 	(unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
 		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
 		   UNSPEC_VST2))]
   "TARGET_NEON"
-  "vst2.<V_sz_elem>\t%h1, [%0]"
+  "vst2.<V_sz_elem>\t%h1, %A0"
   [(set_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")]
 )
 
 (define_insn "neon_vst2_lane<mode>"
-  [(set (mem:<V_two_elem> (match_operand:SI 0 "s_register_operand" "r"))
+  [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
 	(unspec:<V_two_elem>
 	  [(match_operand:TI 1 "s_register_operand" "w")
 	   (match_operand:SI 2 "immediate_operand" "i")
@@ -4548,14 +4448,14 @@
   ops[1] = gen_rtx_REG (DImode, regno);
   ops[2] = gen_rtx_REG (DImode, regno + 2);
   ops[3] = operands[2];
-  output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, [%0]", ops);
+  output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
   return "";
 }
   [(set_attr "neon_type" "neon_vst1_vst2_lane")]
 )
 
 (define_insn "neon_vst2_lane<mode>"
-  [(set (mem:<V_two_elem> (match_operand:SI 0 "s_register_operand" "r"))
+  [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
         (unspec:<V_two_elem>
            [(match_operand:OI 1 "s_register_operand" "w")
             (match_operand:SI 2 "immediate_operand" "i")
@@ -4578,23 +4478,30 @@
   ops[1] = gen_rtx_REG (DImode, regno);
   ops[2] = gen_rtx_REG (DImode, regno + 4);
   ops[3] = GEN_INT (lane);
-  output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, [%0]", ops);
+  output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
   return "";
 }
   [(set_attr "neon_type" "neon_vst1_vst2_lane")]
 )
 
+(define_expand "vec_load_lanesei<mode>"
+  [(set (match_operand:EI 0 "s_register_operand")
+        (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
+                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+		   UNSPEC_VLD3))]
+  "TARGET_NEON")
+
 (define_insn "neon_vld3<mode>"
   [(set (match_operand:EI 0 "s_register_operand" "=w")
-        (unspec:EI [(mem:EI (match_operand:SI 1 "s_register_operand" "r"))
+        (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
                    UNSPEC_VLD3))]
   "TARGET_NEON"
 {
   if (<V_sz_elem> == 64)
-    return "vld1.64\t%h0, [%1]";
+    return "vld1.64\t%h0, %A1";
   else
-    return "vld3.<V_sz_elem>\t%h0, [%1]";
+    return "vld3.<V_sz_elem>\t%h0, %A1";
 }
   [(set (attr "neon_type")
       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
@@ -4602,28 +4509,36 @@
                     (const_string "neon_vld3_vld4")))]
 )
 
+(define_expand "vec_load_lanesci<mode>"
+  [(match_operand:CI 0 "s_register_operand")
+   (match_operand:CI 1 "neon_struct_operand")
+   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+  "TARGET_NEON"
+{
+  emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
+  DONE;
+})
+
 (define_expand "neon_vld3<mode>"
-  [(match_operand:CI 0 "s_register_operand" "=w")
-   (match_operand:SI 1 "s_register_operand" "+r")
+  [(match_operand:CI 0 "s_register_operand")
+   (match_operand:CI 1 "neon_struct_operand")
    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
   "TARGET_NEON"
 {
-  emit_insn (gen_neon_vld3qa<mode> (operands[0], operands[0],
-                                    operands[1], operands[1]));
-  emit_insn (gen_neon_vld3qb<mode> (operands[0], operands[0],
-                                    operands[1], operands[1]));
+  rtx mem;
+
+  mem = adjust_address (operands[1], EImode, 0);
+  emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
+  mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
+  emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
   DONE;
 })
 
 (define_insn "neon_vld3qa<mode>"
   [(set (match_operand:CI 0 "s_register_operand" "=w")
-        (unspec:CI [(mem:CI (match_operand:SI 3 "s_register_operand" "2"))
-                    (match_operand:CI 1 "s_register_operand" "0")
+        (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
-                   UNSPEC_VLD3A))
-   (set (match_operand:SI 2 "s_register_operand" "=r")
-        (plus:SI (match_dup 3)
-		 (const_int 24)))]
+                   UNSPEC_VLD3A))]
   "TARGET_NEON"
 {
   int regno = REGNO (operands[0]);
@@ -4631,8 +4546,8 @@
   ops[0] = gen_rtx_REG (DImode, regno);
   ops[1] = gen_rtx_REG (DImode, regno + 4);
   ops[2] = gen_rtx_REG (DImode, regno + 8);
-  ops[3] = operands[2];
-  output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, [%3]!", ops);
+  ops[3] = operands[1];
+  output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
   return "";
 }
   [(set_attr "neon_type" "neon_vld3_vld4")]
@@ -4640,13 +4555,10 @@
 
 (define_insn "neon_vld3qb<mode>"
   [(set (match_operand:CI 0 "s_register_operand" "=w")
-        (unspec:CI [(mem:CI (match_operand:SI 3 "s_register_operand" "2"))
-                    (match_operand:CI 1 "s_register_operand" "0")
+        (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
+                    (match_operand:CI 2 "s_register_operand" "0")
                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
-                   UNSPEC_VLD3B))
-   (set (match_operand:SI 2 "s_register_operand" "=r")
-        (plus:SI (match_dup 3)
-		 (const_int 24)))]
+                   UNSPEC_VLD3B))]
   "TARGET_NEON"
 {
   int regno = REGNO (operands[0]);
@@ -4654,8 +4566,8 @@
   ops[0] = gen_rtx_REG (DImode, regno + 2);
   ops[1] = gen_rtx_REG (DImode, regno + 6);
   ops[2] = gen_rtx_REG (DImode, regno + 10);
-  ops[3] = operands[2];
-  output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, [%3]!", ops);
+  ops[3] = operands[1];
+  output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
   return "";
 }
   [(set_attr "neon_type" "neon_vld3_vld4")]
@@ -4663,7 +4575,7 @@
 
 (define_insn "neon_vld3_lane<mode>"
   [(set (match_operand:EI 0 "s_register_operand" "=w")
-        (unspec:EI [(mem:<V_three_elem> (match_operand:SI 1 "s_register_operand" "r"))
+        (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
                     (match_operand:EI 2 "s_register_operand" "0")
                     (match_operand:SI 3 "immediate_operand" "i")
                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
@@ -4681,7 +4593,7 @@
   ops[2] = gen_rtx_REG (DImode, regno + 4);
   ops[3] = operands[1];
   ops[4] = operands[3];
-  output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, [%3]",
+  output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %A3",
                    ops);
   return "";
 }
@@ -4690,7 +4602,7 @@
 
 (define_insn "neon_vld3_lane<mode>"
   [(set (match_operand:CI 0 "s_register_operand" "=w")
-        (unspec:CI [(mem:<V_three_elem> (match_operand:SI 1 "s_register_operand" "r"))
+        (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
                     (match_operand:CI 2 "s_register_operand" "0")
                     (match_operand:SI 3 "immediate_operand" "i")
                     (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
@@ -4713,7 +4625,7 @@
   ops[2] = gen_rtx_REG (DImode, regno + 8);
   ops[3] = operands[1];
   ops[4] = GEN_INT (lane);
-  output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, [%3]",
+  output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %A3",
                    ops);
   return "";
 }
@@ -4722,7 +4634,7 @@
 
 (define_insn "neon_vld3_dup<mode>"
   [(set (match_operand:EI 0 "s_register_operand" "=w")
-        (unspec:EI [(mem:<V_three_elem> (match_operand:SI 1 "s_register_operand" "r"))
+        (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
                    UNSPEC_VLD3_DUP))]
   "TARGET_NEON"
@@ -4735,91 +4647,106 @@
       ops[1] = gen_rtx_REG (DImode, regno + 2);
       ops[2] = gen_rtx_REG (DImode, regno + 4);
       ops[3] = operands[1];
-      output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, [%3]", ops);
+      output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %A3", ops);
       return "";
     }
   else
-    return "vld1.<V_sz_elem>\t%h0, [%1]";
+    return "vld1.<V_sz_elem>\t%h0, %A1";
 }
   [(set (attr "neon_type")
       (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
                     (const_string "neon_vld3_vld4_all_lanes")
                     (const_string "neon_vld1_1_2_regs")))])
 
+(define_expand "vec_store_lanesei<mode>"
+  [(set (match_operand:EI 0 "neon_struct_operand")
+	(unspec:EI [(match_operand:EI 1 "s_register_operand")
+                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VST3))]
+  "TARGET_NEON")
+
 (define_insn "neon_vst3<mode>"
-  [(set (mem:EI (match_operand:SI 0 "s_register_operand" "r"))
+  [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
         (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
                    UNSPEC_VST3))]
   "TARGET_NEON"
 {
   if (<V_sz_elem> == 64)
-    return "vst1.64\t%h1, [%0]";
+    return "vst1.64\t%h1, %A0";
   else
-    return "vst3.<V_sz_elem>\t%h1, [%0]";
+    return "vst3.<V_sz_elem>\t%h1, %A0";
 }
   [(set (attr "neon_type")
       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
                     (const_string "neon_vst1_1_2_regs_vst2_2_regs")
                     (const_string "neon_vst2_4_regs_vst3_vst4")))])
 
+(define_expand "vec_store_lanesci<mode>"
+  [(match_operand:CI 0 "neon_struct_operand")
+   (match_operand:CI 1 "s_register_operand")
+   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+  "TARGET_NEON"
+{
+  emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
+  DONE;
+})
+
 (define_expand "neon_vst3<mode>"
-  [(match_operand:SI 0 "s_register_operand" "+r")
-   (match_operand:CI 1 "s_register_operand" "w")
+  [(match_operand:CI 0 "neon_struct_operand")
+   (match_operand:CI 1 "s_register_operand")
    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
   "TARGET_NEON"
 {
-  emit_insn (gen_neon_vst3qa<mode> (operands[0], operands[0], operands[1]));
-  emit_insn (gen_neon_vst3qb<mode> (operands[0], operands[0], operands[1]));
+  rtx mem;
+
+  mem = adjust_address (operands[0], EImode, 0);
+  emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
+  mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
+  emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
   DONE;
 })
 
 (define_insn "neon_vst3qa<mode>"
-  [(set (mem:EI (match_operand:SI 1 "s_register_operand" "0"))
-        (unspec:EI [(match_operand:CI 2 "s_register_operand" "w")
+  [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
+        (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
-                   UNSPEC_VST3A))
-   (set (match_operand:SI 0 "s_register_operand" "=r")
-        (plus:SI (match_dup 1)
-		 (const_int 24)))]
+                   UNSPEC_VST3A))]
   "TARGET_NEON"
 {
-  int regno = REGNO (operands[2]);
+  int regno = REGNO (operands[1]);
   rtx ops[4];
   ops[0] = operands[0];
   ops[1] = gen_rtx_REG (DImode, regno);
   ops[2] = gen_rtx_REG (DImode, regno + 4);
   ops[3] = gen_rtx_REG (DImode, regno + 8);
-  output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, [%0]!", ops);
+  output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
   return "";
 }
   [(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")]
 )
 
 (define_insn "neon_vst3qb<mode>"
-  [(set (mem:EI (match_operand:SI 1 "s_register_operand" "0"))
-        (unspec:EI [(match_operand:CI 2 "s_register_operand" "w")
+  [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
+        (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
-                   UNSPEC_VST3B))
-   (set (match_operand:SI 0 "s_register_operand" "=r")
-        (plus:SI (match_dup 1)
-		 (const_int 24)))]
+                   UNSPEC_VST3B))]
   "TARGET_NEON"
 {
-  int regno = REGNO (operands[2]);
+  int regno = REGNO (operands[1]);
   rtx ops[4];
   ops[0] = operands[0];
   ops[1] = gen_rtx_REG (DImode, regno + 2);
   ops[2] = gen_rtx_REG (DImode, regno + 6);
   ops[3] = gen_rtx_REG (DImode, regno + 10);
-  output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, [%0]!", ops);
+  output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
   return "";
 }
   [(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")]
 )
 
 (define_insn "neon_vst3_lane<mode>"
-  [(set (mem:<V_three_elem> (match_operand:SI 0 "s_register_operand" "r"))
+  [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
         (unspec:<V_three_elem>
            [(match_operand:EI 1 "s_register_operand" "w")
             (match_operand:SI 2 "immediate_operand" "i")
@@ -4838,7 +4765,7 @@
   ops[2] = gen_rtx_REG (DImode, regno + 2);
   ops[3] = gen_rtx_REG (DImode, regno + 4);
   ops[4] = operands[2];
-  output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, [%0]",
+  output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %A0",
                    ops);
   return "";
 }
@@ -4846,7 +4773,7 @@
 )
 
 (define_insn "neon_vst3_lane<mode>"
-  [(set (mem:<V_three_elem> (match_operand:SI 0 "s_register_operand" "r"))
+  [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
         (unspec:<V_three_elem>
            [(match_operand:CI 1 "s_register_operand" "w")
             (match_operand:SI 2 "immediate_operand" "i")
@@ -4870,23 +4797,30 @@
   ops[2] = gen_rtx_REG (DImode, regno + 4);
   ops[3] = gen_rtx_REG (DImode, regno + 8);
   ops[4] = GEN_INT (lane);
-  output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, [%0]",
+  output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %A0",
                    ops);
   return "";
 }
 [(set_attr "neon_type" "neon_vst3_vst4_lane")])
 
+(define_expand "vec_load_lanesoi<mode>"
+  [(set (match_operand:OI 0 "s_register_operand")
+        (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
+                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+		   UNSPEC_VLD4))]
+  "TARGET_NEON")
+
 (define_insn "neon_vld4<mode>"
   [(set (match_operand:OI 0 "s_register_operand" "=w")
-        (unspec:OI [(mem:OI (match_operand:SI 1 "s_register_operand" "r"))
+        (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
                    UNSPEC_VLD4))]
   "TARGET_NEON"
 {
   if (<V_sz_elem> == 64)
-    return "vld1.64\t%h0, [%1]";
+    return "vld1.64\t%h0, %A1";
   else
-    return "vld4.<V_sz_elem>\t%h0, [%1]";
+    return "vld4.<V_sz_elem>\t%h0, %A1";
 }
   [(set (attr "neon_type")
       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
@@ -4894,28 +4828,36 @@
                     (const_string "neon_vld3_vld4")))]
 )
 
+(define_expand "vec_load_lanesxi<mode>"
+  [(match_operand:XI 0 "s_register_operand")
+   (match_operand:XI 1 "neon_struct_operand")
+   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+  "TARGET_NEON"
+{
+  emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
+  DONE;
+})
+
 (define_expand "neon_vld4<mode>"
-  [(match_operand:XI 0 "s_register_operand" "=w")
-   (match_operand:SI 1 "s_register_operand" "+r")
+  [(match_operand:XI 0 "s_register_operand")
+   (match_operand:XI 1 "neon_struct_operand")
    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
   "TARGET_NEON"
 {
-  emit_insn (gen_neon_vld4qa<mode> (operands[0], operands[0],
-                                    operands[1], operands[1]));
-  emit_insn (gen_neon_vld4qb<mode> (operands[0], operands[0],
-                                    operands[1], operands[1]));
+  rtx mem;
+
+  mem = adjust_address (operands[1], OImode, 0);
+  emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
+  mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
+  emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
   DONE;
 })
 
 (define_insn "neon_vld4qa<mode>"
   [(set (match_operand:XI 0 "s_register_operand" "=w")
-        (unspec:XI [(mem:XI (match_operand:SI 3 "s_register_operand" "2"))
-                    (match_operand:XI 1 "s_register_operand" "0")
+        (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
-                   UNSPEC_VLD4A))
-   (set (match_operand:SI 2 "s_register_operand" "=r")
-        (plus:SI (match_dup 3)
-		 (const_int 32)))]
+                   UNSPEC_VLD4A))]
   "TARGET_NEON"
 {
   int regno = REGNO (operands[0]);
@@ -4924,8 +4866,8 @@
   ops[1] = gen_rtx_REG (DImode, regno + 4);
   ops[2] = gen_rtx_REG (DImode, regno + 8);
   ops[3] = gen_rtx_REG (DImode, regno + 12);
-  ops[4] = operands[2];
-  output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, [%4]!", ops);
+  ops[4] = operands[1];
+  output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
   return "";
 }
   [(set_attr "neon_type" "neon_vld3_vld4")]
@@ -4933,13 +4875,10 @@
 
 (define_insn "neon_vld4qb<mode>"
   [(set (match_operand:XI 0 "s_register_operand" "=w")
-        (unspec:XI [(mem:XI (match_operand:SI 3 "s_register_operand" "2"))
-                    (match_operand:XI 1 "s_register_operand" "0")
+        (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
+                    (match_operand:XI 2 "s_register_operand" "0")
                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
-                   UNSPEC_VLD4B))
-   (set (match_operand:SI 2 "s_register_operand" "=r")
-        (plus:SI (match_dup 3)
-		 (const_int 32)))]
+                   UNSPEC_VLD4B))]
   "TARGET_NEON"
 {
   int regno = REGNO (operands[0]);
@@ -4948,8 +4887,8 @@
   ops[1] = gen_rtx_REG (DImode, regno + 6);
   ops[2] = gen_rtx_REG (DImode, regno + 10);
   ops[3] = gen_rtx_REG (DImode, regno + 14);
-  ops[4] = operands[2];
-  output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, [%4]!", ops);
+  ops[4] = operands[1];
+  output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
   return "";
 }
   [(set_attr "neon_type" "neon_vld3_vld4")]
@@ -4957,7 +4896,7 @@
 
 (define_insn "neon_vld4_lane<mode>"
   [(set (match_operand:OI 0 "s_register_operand" "=w")
-        (unspec:OI [(mem:<V_four_elem> (match_operand:SI 1 "s_register_operand" "r"))
+        (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
                     (match_operand:OI 2 "s_register_operand" "0")
                     (match_operand:SI 3 "immediate_operand" "i")
                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
@@ -4976,7 +4915,7 @@
   ops[3] = gen_rtx_REG (DImode, regno + 6);
   ops[4] = operands[1];
   ops[5] = operands[3];
-  output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, [%4]",
+  output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
                    ops);
   return "";
 }
@@ -4985,7 +4924,7 @@
 
 (define_insn "neon_vld4_lane<mode>"
   [(set (match_operand:XI 0 "s_register_operand" "=w")
-        (unspec:XI [(mem:<V_four_elem> (match_operand:SI 1 "s_register_operand" "r"))
+        (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
                     (match_operand:XI 2 "s_register_operand" "0")
                     (match_operand:SI 3 "immediate_operand" "i")
                     (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
@@ -5009,7 +4948,7 @@
   ops[3] = gen_rtx_REG (DImode, regno + 12);
   ops[4] = operands[1];
   ops[5] = GEN_INT (lane);
-  output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, [%4]",
+  output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
                    ops);
   return "";
 }
@@ -5018,7 +4957,7 @@
 
 (define_insn "neon_vld4_dup<mode>"
   [(set (match_operand:OI 0 "s_register_operand" "=w")
-        (unspec:OI [(mem:<V_four_elem> (match_operand:SI 1 "s_register_operand" "r"))
+        (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
                    UNSPEC_VLD4_DUP))]
   "TARGET_NEON"
@@ -5032,12 +4971,12 @@
       ops[2] = gen_rtx_REG (DImode, regno + 4);
       ops[3] = gen_rtx_REG (DImode, regno + 6);
       ops[4] = operands[1];
-      output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, [%4]",
+      output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
                        ops);
       return "";
     }
   else
-    return "vld1.<V_sz_elem>\t%h0, [%1]";
+    return "vld1.<V_sz_elem>\t%h0, %A1";
 }
   [(set (attr "neon_type")
       (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
@@ -5045,17 +4984,24 @@
                     (const_string "neon_vld1_1_2_regs")))]
 )
 
+(define_expand "vec_store_lanesoi<mode>"
+  [(set (match_operand:OI 0 "neon_struct_operand")
+	(unspec:OI [(match_operand:OI 1 "s_register_operand")
+                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VST4))]
+  "TARGET_NEON")
+
 (define_insn "neon_vst4<mode>"
-  [(set (mem:OI (match_operand:SI 0 "s_register_operand" "r"))
+  [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
         (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
                    UNSPEC_VST4))]
   "TARGET_NEON"
 {
   if (<V_sz_elem> == 64)
-    return "vst1.64\t%h1, [%0]";
+    return "vst1.64\t%h1, %A0";
   else
-    return "vst4.<V_sz_elem>\t%h1, [%0]";
+    return "vst4.<V_sz_elem>\t%h1, %A0";
 }
   [(set (attr "neon_type")
       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
@@ -5063,65 +5009,73 @@
                     (const_string "neon_vst2_4_regs_vst3_vst4")))]
 )
 
+(define_expand "vec_store_lanesxi<mode>"
+  [(match_operand:XI 0 "neon_struct_operand")
+   (match_operand:XI 1 "s_register_operand")
+   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+  "TARGET_NEON"
+{
+  emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
+  DONE;
+})
+
 (define_expand "neon_vst4<mode>"
-  [(match_operand:SI 0 "s_register_operand" "+r")
-   (match_operand:XI 1 "s_register_operand" "w")
+  [(match_operand:XI 0 "neon_struct_operand")
+   (match_operand:XI 1 "s_register_operand")
    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
   "TARGET_NEON"
 {
-  emit_insn (gen_neon_vst4qa<mode> (operands[0], operands[0], operands[1]));
-  emit_insn (gen_neon_vst4qb<mode> (operands[0], operands[0], operands[1]));
+  rtx mem;
+
+  mem = adjust_address (operands[0], OImode, 0);
+  emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
+  mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
+  emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
   DONE;
 })
 
 (define_insn "neon_vst4qa<mode>"
-  [(set (mem:OI (match_operand:SI 1 "s_register_operand" "0"))
-        (unspec:OI [(match_operand:XI 2 "s_register_operand" "w")
+  [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
+        (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
-                   UNSPEC_VST4A))
-   (set (match_operand:SI 0 "s_register_operand" "=r")
-        (plus:SI (match_dup 1)
-		 (const_int 32)))]
+                   UNSPEC_VST4A))]
   "TARGET_NEON"
 {
-  int regno = REGNO (operands[2]);
+  int regno = REGNO (operands[1]);
   rtx ops[5];
   ops[0] = operands[0];
   ops[1] = gen_rtx_REG (DImode, regno);
   ops[2] = gen_rtx_REG (DImode, regno + 4);
   ops[3] = gen_rtx_REG (DImode, regno + 8);
   ops[4] = gen_rtx_REG (DImode, regno + 12);
-  output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, [%0]!", ops);
+  output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
   return "";
 }
   [(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")]
 )
 
 (define_insn "neon_vst4qb<mode>"
-  [(set (mem:OI (match_operand:SI 1 "s_register_operand" "0"))
-        (unspec:OI [(match_operand:XI 2 "s_register_operand" "w")
+  [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
+        (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
-                   UNSPEC_VST4B))
-   (set (match_operand:SI 0 "s_register_operand" "=r")
-        (plus:SI (match_dup 1)
-		 (const_int 32)))]
+                   UNSPEC_VST4B))]
   "TARGET_NEON"
 {
-  int regno = REGNO (operands[2]);
+  int regno = REGNO (operands[1]);
   rtx ops[5];
   ops[0] = operands[0];
   ops[1] = gen_rtx_REG (DImode, regno + 2);
   ops[2] = gen_rtx_REG (DImode, regno + 6);
   ops[3] = gen_rtx_REG (DImode, regno + 10);
   ops[4] = gen_rtx_REG (DImode, regno + 14);
-  output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, [%0]!", ops);
+  output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
   return "";
 }
   [(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")]
 )
 
 (define_insn "neon_vst4_lane<mode>"
-  [(set (mem:<V_four_elem> (match_operand:SI 0 "s_register_operand" "r"))
+  [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
         (unspec:<V_four_elem>
            [(match_operand:OI 1 "s_register_operand" "w")
             (match_operand:SI 2 "immediate_operand" "i")
@@ -5141,7 +5095,7 @@
   ops[3] = gen_rtx_REG (DImode, regno + 4);
   ops[4] = gen_rtx_REG (DImode, regno + 6);
   ops[5] = operands[2];
-  output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, [%0]",
+  output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
                    ops);
   return "";
 }
@@ -5149,7 +5103,7 @@
 )
 
 (define_insn "neon_vst4_lane<mode>"
-  [(set (mem:<V_four_elem> (match_operand:SI 0 "s_register_operand" "r"))
+  [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
         (unspec:<V_four_elem>
            [(match_operand:XI 1 "s_register_operand" "w")
             (match_operand:SI 2 "immediate_operand" "i")
@@ -5174,7 +5128,7 @@
   ops[3] = gen_rtx_REG (DImode, regno + 8);
   ops[4] = gen_rtx_REG (DImode, regno + 12);
   ops[5] = GEN_INT (lane);
-  output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, [%0]",
+  output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
                    ops);
   return "";
 }
@@ -5362,6 +5316,44 @@
  }
 )
 
+(define_insn "neon_vec_<US>shiftl_<mode>"
+ [(set (match_operand:<V_widen> 0 "register_operand" "=w")
+       (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
+       (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
+  "TARGET_NEON"
+{
+  return "vshll.<US><V_sz_elem> %q0, %P1, %2";
+}
+  [(set_attr "neon_type" "neon_shift_1")]
+)
+
+(define_expand "vec_widen_<US>shiftl_lo_<mode>"
+  [(match_operand:<V_unpack> 0 "register_operand" "")
+   (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
+   (match_operand:SI 2 "immediate_operand" "i")]
+ "TARGET_NEON && !BYTES_BIG_ENDIAN"
+ {
+  emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
+		simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
+		operands[2]));
+   DONE;
+ }
+)
+
+(define_expand "vec_widen_<US>shiftl_hi_<mode>"
+  [(match_operand:<V_unpack> 0 "register_operand" "")
+   (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
+   (match_operand:SI 2 "immediate_operand" "i")]
+ "TARGET_NEON && !BYTES_BIG_ENDIAN"
+ {
+  emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
+                simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
+				     GET_MODE_SIZE (<V_HALF>mode)),
+                operands[2]));
+   DONE;
+ }
+)
+
 ;; Vectorize for non-neon-quad case
 (define_insn "neon_unpack<US>_<mode>"
  [(set (match_operand:<V_widen> 0 "register_operand" "=w")
@@ -5438,6 +5430,34 @@
  }
 )
 
+(define_expand "vec_widen_<US>shiftl_hi_<mode>"
+ [(match_operand:<V_double_width> 0 "register_operand" "")
+   (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
+   (match_operand:SI 2 "immediate_operand" "i")]
+ "TARGET_NEON"
+ {
+   rtx tmpreg = gen_reg_rtx (<V_widen>mode);
+   emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
+   emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
+
+   DONE;
+ }
+)
+
+(define_expand "vec_widen_<US>shiftl_lo_<mode>"
+  [(match_operand:<V_double_width> 0 "register_operand" "")
+   (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
+   (match_operand:SI 2 "immediate_operand" "i")]
+ "TARGET_NEON"
+ {
+   rtx tmpreg = gen_reg_rtx (<V_widen>mode);
+   emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
+   emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
+
+   DONE;
+ }
+)
+
 ;; The case when using all quad registers.
 (define_insn "vec_pack_trunc_<mode>"
  [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
@@ -5474,3 +5494,32 @@
   emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
   DONE;
 })
+
+(define_insn "neon_vabd<mode>_2"
+ [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+       (abs:VDQ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
+                           (match_operand:VDQ 2 "s_register_operand" "w"))))]
+ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+ "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set (attr "neon_type")
+       (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                     (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                   (const_string "neon_fp_vadd_ddd_vabs_dd")
+                                   (const_string "neon_fp_vadd_qqq_vabs_qq"))
+                     (const_string "neon_int_5")))]
+)
+
+(define_insn "neon_vabd<mode>_3"
+ [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+       (abs:VDQ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
+                             (match_operand:VDQ 2 "s_register_operand" "w")]
+                 UNSPEC_VSUB)))]
+ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+ "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set (attr "neon_type")
+       (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                     (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                   (const_string "neon_fp_vadd_ddd_vabs_dd")
+                                   (const_string "neon_fp_vadd_qqq_vabs_qq"))
+                     (const_string "neon_int_5")))]
+)
--- a/src/gcc/config/arm/neon-testgen.ml
+++ b/src/gcc/config/arm/neon-testgen.ml
@@ -177,7 +177,7 @@
       let alt2 = commas (fun x -> x) (n_things n elt_regexp) "" in
         "\\\\\\{((" ^ alt1 ^ ")|(" ^ alt2 ^ "))\\\\\\}"
     | (PtrTo elt | CstPtrTo elt) ->
-      "\\\\\\[" ^ (analyze_shape_elt elt) ^ "\\\\\\]"
+      "\\\\\\[" ^ (analyze_shape_elt elt) ^ "\\(:\\[0-9\\]+\\)?\\\\\\]"
     | Element_of_dreg -> (analyze_shape_elt Dreg) ^ "\\\\\\[\\[0-9\\]+\\\\\\]"
     | Element_of_qreg -> (analyze_shape_elt Qreg) ^ "\\\\\\[\\[0-9\\]+\\\\\\]"
     | All_elements_of_dreg -> (analyze_shape_elt Dreg) ^ "\\\\\\[\\\\\\]"
--- a/src/gcc/config/arm/predicates.md
+++ b/src/gcc/config/arm/predicates.md
@@ -129,11 +129,18 @@
   (ior (match_operand 0 "arm_rhs_operand")
        (match_operand 0 "memory_operand")))
 
+;; This doesn't have to do much because the constant is already checked
+;; in the shift_operator predicate.
 (define_predicate "shift_amount_operand"
   (ior (and (match_test "TARGET_ARM")
 	    (match_operand 0 "s_register_operand"))
        (match_operand 0 "const_int_operand")))
 
+(define_predicate "const_neon_scalar_shift_amount_operand"
+  (and (match_code "const_int")
+       (match_test "((unsigned HOST_WIDE_INT) INTVAL (op)) <= GET_MODE_BITSIZE (mode)
+	&& ((unsigned HOST_WIDE_INT) INTVAL (op)) > 0")))
+
 (define_predicate "arm_add_operand"
   (ior (match_operand 0 "arm_rhs_operand")
        (match_operand 0 "arm_neg_immediate_operand")))
@@ -218,13 +225,29 @@
        (match_test "mode == GET_MODE (op)")))
 
 ;; True for shift operators.
+;; Notes:
+;;  * mult is only permitted with a constant shift amount
+;;  * patterns that permit register shift amounts only in ARM mode use
+;;    shift_amount_operand, patterns that always allow registers do not,
+;;    so we don't have to worry about that sort of thing here.
 (define_special_predicate "shift_operator"
   (and (ior (ior (and (match_code "mult")
 		      (match_test "power_of_two_operand (XEXP (op, 1), mode)"))
 		 (and (match_code "rotate")
 		      (match_test "GET_CODE (XEXP (op, 1)) == CONST_INT
 				   && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32")))
-	    (match_code "ashift,ashiftrt,lshiftrt,rotatert"))
+	    (and (match_code "ashift,ashiftrt,lshiftrt,rotatert")
+		 (match_test "GET_CODE (XEXP (op, 1)) != CONST_INT
+			      || ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32")))
+       (match_test "mode == GET_MODE (op)")))
+
+;; True for shift operators which can be used with saturation instructions.
+(define_special_predicate "sat_shift_operator"
+  (and (ior (and (match_code "mult")
+                 (match_test "power_of_two_operand (XEXP (op, 1), mode)"))
+            (and (match_code "ashift,ashiftrt")
+                 (match_test "GET_CODE (XEXP (op, 1)) == CONST_INT
+                             && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1)) < 32)")))
        (match_test "mode == GET_MODE (op)")))
 
 ;; True for MULT, to identify which variant of shift_operator is in use.
@@ -241,11 +264,15 @@
 
 ;; True for integer comparisons and, if FP is active, for comparisons
 ;; other than LTGT or UNEQ.
+(define_special_predicate "expandable_comparison_operator"
+  (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu,
+	       unordered,ordered,unlt,unle,unge,ungt"))
+
+;; Likewise, but only accept comparisons that are directly supported
+;; by ARM condition codes.
 (define_special_predicate "arm_comparison_operator"
-  (ior (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu")
-       (and (match_test "TARGET_32BIT && TARGET_HARD_FLOAT
-			 && (TARGET_FPA || TARGET_VFP)")
-            (match_code "unordered,ordered,unlt,unle,unge,ungt"))))
+  (and (match_operand 0 "expandable_comparison_operator")
+       (match_test "maybe_get_arm_condition_code (op) != ARM_NV")))
 
 (define_special_predicate "lt_ge_comparison_operator"
   (match_code "lt,ge"))
@@ -289,8 +316,11 @@
 
 (define_special_predicate "arm_extendqisi_mem_op"
   (and (match_operand 0 "memory_operand")
-       (match_test "arm_legitimate_address_outer_p (mode, XEXP (op, 0),
-						    SIGN_EXTEND, 0)")))
+       (match_test "TARGET_ARM ? arm_legitimate_address_outer_p (mode,
+                                                                 XEXP (op, 0),
+						                 SIGN_EXTEND,
+								 0)
+                               : memory_address_p (QImode, XEXP (op, 0))")))
 
 (define_special_predicate "arm_reg_or_extendqisi_mem_op"
   (ior (match_operand 0 "arm_extendqisi_mem_op")
@@ -585,6 +615,26 @@
   return neon_immediate_valid_for_move (op, mode, NULL, NULL);
 })
 
+(define_predicate "imm_for_neon_lshift_operand"
+  (match_code "const_vector")
+{
+  return neon_immediate_valid_for_shift (op, mode, NULL, NULL, true);
+})
+
+(define_predicate "imm_for_neon_rshift_operand"
+  (match_code "const_vector")
+{
+  return neon_immediate_valid_for_shift (op, mode, NULL, NULL, false);
+})
+
+(define_predicate "imm_lshift_or_reg_neon"
+  (ior (match_operand 0 "s_register_operand")
+       (match_operand 0 "imm_for_neon_lshift_operand")))
+
+(define_predicate "imm_rshift_or_reg_neon"
+  (ior (match_operand 0 "s_register_operand")
+       (match_operand 0 "imm_for_neon_rshift_operand")))
+
 (define_predicate "imm_for_neon_logic_operand"
   (match_code "const_vector")
 {
@@ -684,5 +734,14 @@
   return true; 
 })
 
+(define_predicate "const_double_vcvt_power_of_two_reciprocal"
+  (and (match_code "const_double")
+       (match_test "TARGET_32BIT && TARGET_VFP 
+       		    && vfp3_const_double_for_fract_bits (op)")))
+
+(define_special_predicate "neon_struct_operand"
+  (and (match_code "mem")
+       (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 2)")))
+
 (define_special_predicate "add_operator"
 			 (match_code "plus"))
--- a/src/gcc/config/arm/semi.h
+++ b/src/gcc/config/arm/semi.h
@@ -65,8 +65,7 @@
 #define ASM_SPEC "\
 %{fpic|fpie: -k} %{fPIC|fPIE: -k} \
 %{mbig-endian:-EB} \
-%{mcpu=*:-mcpu=%*} \
-%{march=*:-march=%*} \
+%(arm_cpu_spec) \
 %{mapcs-float:-mfloat} \
 %{msoft-float:-mfloat-abi=soft} %{mhard-float:-mfloat-abi=hard} \
 %{mfloat-abi=*} %{mfpu=*} \
--- a/src/gcc/config/arm/sync.md
+++ b/src/gcc/config/arm/sync.md
@@ -1,6 +1,7 @@
 ;; Machine description for ARM processor synchronization primitives.
 ;; Copyright (C) 2010 Free Software Foundation, Inc.
 ;; Written by Marcus Shawcroft (marcus.shawcroft@arm.com)
+;; 64bit Atomics by Dave Gilbert (david.gilbert@linaro.org)
 ;;
 ;; This file is part of GCC.
 ;;
@@ -33,31 +34,24 @@
   MEM_VOLATILE_P (operands[0]) = 1;
 })
 
-(define_expand "sync_compare_and_swapsi"
-  [(set (match_operand:SI 0 "s_register_operand")
-        (unspec_volatile:SI [(match_operand:SI 1 "memory_operand")
-			     (match_operand:SI 2 "s_register_operand")
-			     (match_operand:SI 3 "s_register_operand")]
-			     VUNSPEC_SYNC_COMPARE_AND_SWAP))]
-  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
-  {
-    struct arm_sync_generator generator;
-    generator.op = arm_sync_generator_omrn;
-    generator.u.omrn = gen_arm_sync_compare_and_swapsi;
-    arm_expand_sync (SImode, &generator, operands[0], operands[1], operands[2],
-                     operands[3]);
-    DONE;
-  })
 
-(define_mode_iterator NARROW [QI HI])
+(define_mode_attr sync_predtab [(SI "TARGET_HAVE_LDREX &&
+					TARGET_HAVE_MEMORY_BARRIER")
+				(QI "TARGET_HAVE_LDREXBH &&
+					TARGET_HAVE_MEMORY_BARRIER")
+				(HI "TARGET_HAVE_LDREXBH &&
+					TARGET_HAVE_MEMORY_BARRIER")
+				(DI "TARGET_HAVE_LDREXD &&
+					ARM_DOUBLEWORD_ALIGN &&
+					TARGET_HAVE_MEMORY_BARRIER")])
 
 (define_expand "sync_compare_and_swap<mode>"
-  [(set (match_operand:NARROW 0 "s_register_operand")
-        (unspec_volatile:NARROW [(match_operand:NARROW 1 "memory_operand")
-			     (match_operand:NARROW 2 "s_register_operand")
-			     (match_operand:NARROW 3 "s_register_operand")]
+  [(set (match_operand:QHSD 0 "s_register_operand")
+        (unspec_volatile:QHSD [(match_operand:QHSD 1 "memory_operand")
+			     (match_operand:QHSD 2 "s_register_operand")
+			     (match_operand:QHSD 3 "s_register_operand")]
 			     VUNSPEC_SYNC_COMPARE_AND_SWAP))]
-  "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
+  "<sync_predtab>"
   {
     struct arm_sync_generator generator;
     generator.op = arm_sync_generator_omrn;
@@ -67,25 +61,11 @@
     DONE;
   })
 
-(define_expand "sync_lock_test_and_setsi"
-  [(match_operand:SI 0 "s_register_operand")
-   (match_operand:SI 1 "memory_operand")
-   (match_operand:SI 2 "s_register_operand")]
-  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
-  {
-    struct arm_sync_generator generator;
-    generator.op = arm_sync_generator_omn;
-    generator.u.omn = gen_arm_sync_lock_test_and_setsi;
-    arm_expand_sync (SImode, &generator, operands[0], operands[1], NULL,
-                     operands[2]);
-    DONE;
-  })
-
 (define_expand "sync_lock_test_and_set<mode>"
-  [(match_operand:NARROW 0 "s_register_operand")
-   (match_operand:NARROW 1 "memory_operand")
-   (match_operand:NARROW 2 "s_register_operand")]
-  "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
+  [(match_operand:QHSD 0 "s_register_operand")
+   (match_operand:QHSD 1 "memory_operand")
+   (match_operand:QHSD 2 "s_register_operand")]
+  "<sync_predtab>"
   {
     struct arm_sync_generator generator;
     generator.op = arm_sync_generator_omn;
@@ -115,51 +95,25 @@
 				(plus "*")
 				(minus "*")])
 
-(define_expand "sync_<sync_optab>si"
-  [(match_operand:SI 0 "memory_operand")
-   (match_operand:SI 1 "s_register_operand")
-   (syncop:SI (match_dup 0) (match_dup 1))]
-  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
-  {
-    struct arm_sync_generator generator;
-    generator.op = arm_sync_generator_omn;
-    generator.u.omn = gen_arm_sync_new_<sync_optab>si;
-    arm_expand_sync (SImode, &generator, NULL, operands[0], NULL, operands[1]);
-    DONE;
-  })
-
-(define_expand "sync_nandsi"
-  [(match_operand:SI 0 "memory_operand")
-   (match_operand:SI 1 "s_register_operand")
-   (not:SI (and:SI (match_dup 0) (match_dup 1)))]
-  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
-  {
-    struct arm_sync_generator generator;
-    generator.op = arm_sync_generator_omn;
-    generator.u.omn = gen_arm_sync_new_nandsi;
-    arm_expand_sync (SImode, &generator, NULL, operands[0], NULL, operands[1]);
-    DONE;
-  })
-
 (define_expand "sync_<sync_optab><mode>"
-  [(match_operand:NARROW 0 "memory_operand")
-   (match_operand:NARROW 1 "s_register_operand")
-   (syncop:NARROW (match_dup 0) (match_dup 1))]
-  "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
+  [(match_operand:QHSD 0 "memory_operand")
+   (match_operand:QHSD 1 "s_register_operand")
+   (syncop:QHSD (match_dup 0) (match_dup 1))]
+  "<sync_predtab>"
   {
     struct arm_sync_generator generator;
     generator.op = arm_sync_generator_omn;
     generator.u.omn = gen_arm_sync_new_<sync_optab><mode>;
     arm_expand_sync (<MODE>mode, &generator, NULL, operands[0], NULL,
-    		     operands[1]);
+		     operands[1]);
     DONE;
   })
 
 (define_expand "sync_nand<mode>"
-  [(match_operand:NARROW 0 "memory_operand")
-   (match_operand:NARROW 1 "s_register_operand")
-   (not:NARROW (and:NARROW (match_dup 0) (match_dup 1)))]
-  "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
+  [(match_operand:QHSD 0 "memory_operand")
+   (match_operand:QHSD 1 "s_register_operand")
+   (not:QHSD (and:QHSD (match_dup 0) (match_dup 1)))]
+  "<sync_predtab>"
   {
     struct arm_sync_generator generator;
     generator.op = arm_sync_generator_omn;
@@ -169,57 +123,27 @@
     DONE;
   })
 
-(define_expand "sync_new_<sync_optab>si"
-  [(match_operand:SI 0 "s_register_operand")
-   (match_operand:SI 1 "memory_operand")
-   (match_operand:SI 2 "s_register_operand")
-   (syncop:SI (match_dup 1) (match_dup 2))]
-  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
-  {
-    struct arm_sync_generator generator;
-    generator.op = arm_sync_generator_omn;
-    generator.u.omn = gen_arm_sync_new_<sync_optab>si;
-    arm_expand_sync (SImode, &generator, operands[0], operands[1], NULL,
-                     operands[2]);
-    DONE;
-  })
-
-(define_expand "sync_new_nandsi"
-  [(match_operand:SI 0 "s_register_operand")
-   (match_operand:SI 1 "memory_operand")
-   (match_operand:SI 2 "s_register_operand")
-   (not:SI (and:SI (match_dup 1) (match_dup 2)))]
-  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
-  {
-    struct arm_sync_generator generator;
-    generator.op = arm_sync_generator_omn;
-    generator.u.omn = gen_arm_sync_new_nandsi;
-    arm_expand_sync (SImode, &generator, operands[0], operands[1], NULL,
-    		     operands[2]);
-    DONE;
-  })
-
 (define_expand "sync_new_<sync_optab><mode>"
-  [(match_operand:NARROW 0 "s_register_operand")
-   (match_operand:NARROW 1 "memory_operand")
-   (match_operand:NARROW 2 "s_register_operand")
-   (syncop:NARROW (match_dup 1) (match_dup 2))]
-  "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
+  [(match_operand:QHSD 0 "s_register_operand")
+   (match_operand:QHSD 1 "memory_operand")
+   (match_operand:QHSD 2 "s_register_operand")
+   (syncop:QHSD (match_dup 1) (match_dup 2))]
+  "<sync_predtab>"
   {
     struct arm_sync_generator generator;
     generator.op = arm_sync_generator_omn;
     generator.u.omn = gen_arm_sync_new_<sync_optab><mode>;
     arm_expand_sync (<MODE>mode, &generator, operands[0], operands[1],
-    		     NULL, operands[2]);
+		     NULL, operands[2]);
     DONE;
   })
 
 (define_expand "sync_new_nand<mode>"
-  [(match_operand:NARROW 0 "s_register_operand")
-   (match_operand:NARROW 1 "memory_operand")
-   (match_operand:NARROW 2 "s_register_operand")
-   (not:NARROW (and:NARROW (match_dup 1) (match_dup 2)))]
-  "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
+  [(match_operand:QHSD 0 "s_register_operand")
+   (match_operand:QHSD 1 "memory_operand")
+   (match_operand:QHSD 2 "s_register_operand")
+   (not:QHSD (and:QHSD (match_dup 1) (match_dup 2)))]
+  "<sync_predtab>"
   {
     struct arm_sync_generator generator;
     generator.op = arm_sync_generator_omn;
@@ -229,57 +153,27 @@
     DONE;
   });
 
-(define_expand "sync_old_<sync_optab>si"
-  [(match_operand:SI 0 "s_register_operand")
-   (match_operand:SI 1 "memory_operand")
-   (match_operand:SI 2 "s_register_operand")
-   (syncop:SI (match_dup 1) (match_dup 2))]
-  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
-  {
-    struct arm_sync_generator generator;
-    generator.op = arm_sync_generator_omn;
-    generator.u.omn = gen_arm_sync_old_<sync_optab>si;
-    arm_expand_sync (SImode, &generator, operands[0], operands[1], NULL,
-                     operands[2]);
-    DONE;
-  })
-
-(define_expand "sync_old_nandsi"
-  [(match_operand:SI 0 "s_register_operand")
-   (match_operand:SI 1 "memory_operand")
-   (match_operand:SI 2 "s_register_operand")
-   (not:SI (and:SI (match_dup 1) (match_dup 2)))]
-  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
-  {
-    struct arm_sync_generator generator;
-    generator.op = arm_sync_generator_omn;
-    generator.u.omn = gen_arm_sync_old_nandsi;
-    arm_expand_sync (SImode, &generator, operands[0], operands[1], NULL,
-                     operands[2]);
-    DONE;
-  })
-
 (define_expand "sync_old_<sync_optab><mode>"
-  [(match_operand:NARROW 0 "s_register_operand")
-   (match_operand:NARROW 1 "memory_operand")
-   (match_operand:NARROW 2 "s_register_operand")
-   (syncop:NARROW (match_dup 1) (match_dup 2))]
-  "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
+  [(match_operand:QHSD 0 "s_register_operand")
+   (match_operand:QHSD 1 "memory_operand")
+   (match_operand:QHSD 2 "s_register_operand")
+   (syncop:QHSD (match_dup 1) (match_dup 2))]
+  "<sync_predtab>"
   {
     struct arm_sync_generator generator;
     generator.op = arm_sync_generator_omn;
     generator.u.omn = gen_arm_sync_old_<sync_optab><mode>;
     arm_expand_sync (<MODE>mode, &generator, operands[0], operands[1],
-    		     NULL, operands[2]);
+		     NULL, operands[2]);
     DONE;
   })
 
 (define_expand "sync_old_nand<mode>"
-  [(match_operand:NARROW 0 "s_register_operand")
-   (match_operand:NARROW 1 "memory_operand")
-   (match_operand:NARROW 2 "s_register_operand")
-   (not:NARROW (and:NARROW (match_dup 1) (match_dup 2)))]
-  "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
+  [(match_operand:QHSD 0 "s_register_operand")
+   (match_operand:QHSD 1 "memory_operand")
+   (match_operand:QHSD 2 "s_register_operand")
+   (not:QHSD (and:QHSD (match_dup 1) (match_dup 2)))]
+  "<sync_predtab>"
   {
     struct arm_sync_generator generator;
     generator.op = arm_sync_generator_omn;
@@ -289,22 +183,22 @@
     DONE;
   })
 
-(define_insn "arm_sync_compare_and_swapsi"
-  [(set (match_operand:SI 0 "s_register_operand" "=&r")
-        (unspec_volatile:SI
-	  [(match_operand:SI 1 "arm_sync_memory_operand" "+Q")
-   	   (match_operand:SI 2 "s_register_operand" "r")
-	   (match_operand:SI 3 "s_register_operand" "r")]
-	  VUNSPEC_SYNC_COMPARE_AND_SWAP))
-   (set (match_dup 1) (unspec_volatile:SI [(match_dup 2)]
+(define_insn "arm_sync_compare_and_swap<mode>"
+  [(set (match_operand:SIDI 0 "s_register_operand" "=&r")
+        (unspec_volatile:SIDI
+	 [(match_operand:SIDI 1 "arm_sync_memory_operand" "+Q")
+	  (match_operand:SIDI 2 "s_register_operand" "r")
+	  (match_operand:SIDI 3 "s_register_operand" "r")]
+	 VUNSPEC_SYNC_COMPARE_AND_SWAP))
+   (set (match_dup 1) (unspec_volatile:SIDI [(match_dup 2)]
                                           VUNSPEC_SYNC_COMPARE_AND_SWAP))
    (set (reg:CC CC_REGNUM) (unspec_volatile:CC [(match_dup 1)]
                                                 VUNSPEC_SYNC_COMPARE_AND_SWAP))
    ]
-  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
+  "<sync_predtab>"
   {
     return arm_output_sync_insn (insn, operands);
-  } 
+  }
   [(set_attr "sync_result"          "0")
    (set_attr "sync_memory"          "1")
    (set_attr "sync_required_value"  "2")
@@ -318,7 +212,7 @@
         (zero_extend:SI
 	  (unspec_volatile:NARROW
 	    [(match_operand:NARROW 1 "arm_sync_memory_operand" "+Q")
-   	     (match_operand:SI 2 "s_register_operand" "r")
+	     (match_operand:SI 2 "s_register_operand" "r")
 	     (match_operand:SI 3 "s_register_operand" "r")]
 	    VUNSPEC_SYNC_COMPARE_AND_SWAP)))
    (set (match_dup 1) (unspec_volatile:NARROW [(match_dup 2)]
@@ -326,10 +220,10 @@
    (set (reg:CC CC_REGNUM) (unspec_volatile:CC [(match_dup 1)]
                                                 VUNSPEC_SYNC_COMPARE_AND_SWAP))
    ]
-  "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
+  "<sync_predtab>"
   {
     return arm_output_sync_insn (insn, operands);
-  } 
+  }
   [(set_attr "sync_result"          "0")
    (set_attr "sync_memory"          "1")
    (set_attr "sync_required_value"  "2")
@@ -338,18 +232,18 @@
    (set_attr "conds" "clob")
    (set_attr "predicable" "no")])
 
-(define_insn "arm_sync_lock_test_and_setsi"
-  [(set (match_operand:SI 0 "s_register_operand" "=&r")
-        (match_operand:SI 1 "arm_sync_memory_operand" "+Q"))
+(define_insn "arm_sync_lock_test_and_set<mode>"
+  [(set (match_operand:SIDI 0 "s_register_operand" "=&r")
+	(match_operand:SIDI 1 "arm_sync_memory_operand" "+Q"))
    (set (match_dup 1)
-        (unspec_volatile:SI [(match_operand:SI 2 "s_register_operand" "r")]
-	                    VUNSPEC_SYNC_LOCK))
+	(unspec_volatile:SIDI [(match_operand:SIDI 2 "s_register_operand" "r")]
+	VUNSPEC_SYNC_LOCK))
    (clobber (reg:CC CC_REGNUM))
    (clobber (match_scratch:SI 3 "=&r"))]
-  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
+  "<sync_predtab>"
   {
     return arm_output_sync_insn (insn, operands);
-  } 
+  }
   [(set_attr "sync_release_barrier" "no")
    (set_attr "sync_result"          "0")
    (set_attr "sync_memory"          "1")
@@ -364,10 +258,10 @@
         (zero_extend:SI (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q")))
    (set (match_dup 1)
         (unspec_volatile:NARROW [(match_operand:SI 2 "s_register_operand" "r")]
-	                        VUNSPEC_SYNC_LOCK))
+				VUNSPEC_SYNC_LOCK))
    (clobber (reg:CC CC_REGNUM))
    (clobber (match_scratch:SI 3 "=&r"))]
-  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
+  "<sync_predtab>"
   {
     return arm_output_sync_insn (insn, operands);
   } 
@@ -380,22 +274,22 @@
    (set_attr "conds" "clob")
    (set_attr "predicable" "no")])
 
-(define_insn "arm_sync_new_<sync_optab>si"
-  [(set (match_operand:SI 0 "s_register_operand" "=&r")
-        (unspec_volatile:SI [(syncop:SI
-                               (match_operand:SI 1 "arm_sync_memory_operand" "+Q")
-                               (match_operand:SI 2 "s_register_operand" "r"))
-	                    ]
-	                    VUNSPEC_SYNC_NEW_OP))
+(define_insn "arm_sync_new_<sync_optab><mode>"
+  [(set (match_operand:SIDI 0 "s_register_operand" "=&r")
+        (unspec_volatile:SIDI [(syncop:SIDI
+			       (match_operand:SIDI 1 "arm_sync_memory_operand" "+Q")
+			       (match_operand:SIDI 2 "s_register_operand" "r"))
+			    ]
+			    VUNSPEC_SYNC_NEW_OP))
    (set (match_dup 1)
-        (unspec_volatile:SI [(match_dup 1) (match_dup 2)]
-	                    VUNSPEC_SYNC_NEW_OP))
+	(unspec_volatile:SIDI [(match_dup 1) (match_dup 2)]
+			    VUNSPEC_SYNC_NEW_OP))
    (clobber (reg:CC CC_REGNUM))
    (clobber (match_scratch:SI 3 "=&r"))]
-  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
+  "<sync_predtab>"
   {
     return arm_output_sync_insn (insn, operands);
-  } 
+  }
   [(set_attr "sync_result"          "0")
    (set_attr "sync_memory"          "1")
    (set_attr "sync_new_value"       "2")
@@ -405,54 +299,54 @@
    (set_attr "conds" "clob")
    (set_attr "predicable" "no")])
 
-(define_insn "arm_sync_new_nandsi"
+(define_insn "arm_sync_new_<sync_optab><mode>"
   [(set (match_operand:SI 0 "s_register_operand" "=&r")
-        (unspec_volatile:SI [(not:SI (and:SI
-                               (match_operand:SI 1 "arm_sync_memory_operand" "+Q")
-                               (match_operand:SI 2 "s_register_operand" "r")))
-	                    ]
-	                    VUNSPEC_SYNC_NEW_OP))
+        (unspec_volatile:SI [(syncop:SI
+			       (zero_extend:SI
+				 (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q"))
+			       (match_operand:SI 2 "s_register_operand" "r"))
+			    ]
+			    VUNSPEC_SYNC_NEW_OP))
    (set (match_dup 1)
-        (unspec_volatile:SI [(match_dup 1) (match_dup 2)]
-	                    VUNSPEC_SYNC_NEW_OP))
+	(unspec_volatile:NARROW [(match_dup 1) (match_dup 2)]
+				VUNSPEC_SYNC_NEW_OP))
    (clobber (reg:CC CC_REGNUM))
    (clobber (match_scratch:SI 3 "=&r"))]
-  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
+  "<sync_predtab>"
   {
     return arm_output_sync_insn (insn, operands);
-  } 
+  }
   [(set_attr "sync_result"          "0")
    (set_attr "sync_memory"          "1")
    (set_attr "sync_new_value"       "2")
    (set_attr "sync_t1"              "0")
    (set_attr "sync_t2"              "3")
-   (set_attr "sync_op"              "nand")
+   (set_attr "sync_op"              "<sync_optab>")
    (set_attr "conds" "clob")
    (set_attr "predicable" "no")])
 
-(define_insn "arm_sync_new_<sync_optab><mode>"
-  [(set (match_operand:SI 0 "s_register_operand" "=&r")
-        (unspec_volatile:SI [(syncop:SI
-                               (zero_extend:SI
-			         (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q"))
-                               (match_operand:SI 2 "s_register_operand" "r"))
-	                    ]
-	                    VUNSPEC_SYNC_NEW_OP))
+(define_insn "arm_sync_new_nand<mode>"
+  [(set (match_operand:SIDI 0 "s_register_operand" "=&r")
+        (unspec_volatile:SIDI [(not:SIDI (and:SIDI
+			       (match_operand:SIDI 1 "arm_sync_memory_operand" "+Q")
+			       (match_operand:SIDI 2 "s_register_operand" "r")))
+			    ]
+			    VUNSPEC_SYNC_NEW_OP))
    (set (match_dup 1)
-        (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)]
-	                        VUNSPEC_SYNC_NEW_OP))
+	(unspec_volatile:SIDI [(match_dup 1) (match_dup 2)]
+			    VUNSPEC_SYNC_NEW_OP))
    (clobber (reg:CC CC_REGNUM))
    (clobber (match_scratch:SI 3 "=&r"))]
-  "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
+  "<sync_predtab>"
   {
     return arm_output_sync_insn (insn, operands);
-  } 
+  }
   [(set_attr "sync_result"          "0")
    (set_attr "sync_memory"          "1")
    (set_attr "sync_new_value"       "2")
    (set_attr "sync_t1"              "0")
    (set_attr "sync_t2"              "3")
-   (set_attr "sync_op"              "<sync_optab>")
+   (set_attr "sync_op"              "nand")
    (set_attr "conds" "clob")
    (set_attr "predicable" "no")])
 
@@ -461,19 +355,19 @@
         (unspec_volatile:SI
 	  [(not:SI
 	     (and:SI
-               (zero_extend:SI	  
-	         (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q"))
-               (match_operand:SI 2 "s_register_operand" "r")))
+	       (zero_extend:SI
+		 (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q"))
+	       (match_operand:SI 2 "s_register_operand" "r")))
 	  ] VUNSPEC_SYNC_NEW_OP))
    (set (match_dup 1)
         (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)]
-	                        VUNSPEC_SYNC_NEW_OP))
+				VUNSPEC_SYNC_NEW_OP))
    (clobber (reg:CC CC_REGNUM))
    (clobber (match_scratch:SI 3 "=&r"))]
-  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
+  "<sync_predtab>"
   {
     return arm_output_sync_insn (insn, operands);
-  } 
+  }
   [(set_attr "sync_result"          "0")
    (set_attr "sync_memory"          "1")
    (set_attr "sync_new_value"       "2")
@@ -483,20 +377,20 @@
    (set_attr "conds" "clob")
    (set_attr "predicable" "no")])
 
-(define_insn "arm_sync_old_<sync_optab>si"
-  [(set (match_operand:SI 0 "s_register_operand" "=&r")
-        (unspec_volatile:SI [(syncop:SI
-                               (match_operand:SI 1 "arm_sync_memory_operand" "+Q")
-                               (match_operand:SI 2 "s_register_operand" "r"))
-	                    ]
-	                    VUNSPEC_SYNC_OLD_OP))
+(define_insn "arm_sync_old_<sync_optab><mode>"
+  [(set (match_operand:SIDI 0 "s_register_operand" "=&r")
+	(unspec_volatile:SIDI [(syncop:SIDI
+			       (match_operand:SIDI 1 "arm_sync_memory_operand" "+Q")
+			       (match_operand:SIDI 2 "s_register_operand" "r"))
+			    ]
+			    VUNSPEC_SYNC_OLD_OP))
    (set (match_dup 1)
-        (unspec_volatile:SI [(match_dup 1) (match_dup 2)]
-	                    VUNSPEC_SYNC_OLD_OP))
+        (unspec_volatile:SIDI [(match_dup 1) (match_dup 2)]
+			      VUNSPEC_SYNC_OLD_OP))
    (clobber (reg:CC CC_REGNUM))
-   (clobber (match_scratch:SI 3 "=&r"))
+   (clobber (match_scratch:SIDI 3 "=&r"))
    (clobber (match_scratch:SI 4 "<sync_clobber>"))]
-  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
+  "<sync_predtab>"
   {
     return arm_output_sync_insn (insn, operands);
   } 
@@ -509,20 +403,21 @@
    (set_attr "conds" "clob")
    (set_attr "predicable" "no")])
 
-(define_insn "arm_sync_old_nandsi"
+(define_insn "arm_sync_old_<sync_optab><mode>"
   [(set (match_operand:SI 0 "s_register_operand" "=&r")
-        (unspec_volatile:SI [(not:SI (and:SI
-                               (match_operand:SI 1 "arm_sync_memory_operand" "+Q")
-                               (match_operand:SI 2 "s_register_operand" "r")))
-	                    ]
-	                    VUNSPEC_SYNC_OLD_OP))
+        (unspec_volatile:SI [(syncop:SI
+			       (zero_extend:SI
+				 (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q"))
+			       (match_operand:SI 2 "s_register_operand" "r"))
+			    ]
+			    VUNSPEC_SYNC_OLD_OP))
    (set (match_dup 1)
-        (unspec_volatile:SI [(match_dup 1) (match_dup 2)]
-	                    VUNSPEC_SYNC_OLD_OP))
+	(unspec_volatile:NARROW [(match_dup 1) (match_dup 2)]
+			    VUNSPEC_SYNC_OLD_OP))
    (clobber (reg:CC CC_REGNUM))
    (clobber (match_scratch:SI 3 "=&r"))
-   (clobber (match_scratch:SI 4 "=&r"))]
-  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
+   (clobber (match_scratch:SI 4 "<sync_clobber>"))]
+  "<sync_predtab>"
   {
     return arm_output_sync_insn (insn, operands);
   } 
@@ -530,26 +425,25 @@
    (set_attr "sync_memory"          "1")
    (set_attr "sync_new_value"       "2")
    (set_attr "sync_t1"              "3")
-   (set_attr "sync_t2"              "4")
-   (set_attr "sync_op"              "nand")
+   (set_attr "sync_t2"              "<sync_t2_reqd>")
+   (set_attr "sync_op"              "<sync_optab>")
    (set_attr "conds" 		    "clob")
    (set_attr "predicable" "no")])
 
-(define_insn "arm_sync_old_<sync_optab><mode>"
-  [(set (match_operand:SI 0 "s_register_operand" "=&r")
-        (unspec_volatile:SI [(syncop:SI
-                               (zero_extend:SI
-			         (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q"))
-                               (match_operand:SI 2 "s_register_operand" "r"))
-	                    ]
-	                    VUNSPEC_SYNC_OLD_OP))
+(define_insn "arm_sync_old_nand<mode>"
+  [(set (match_operand:SIDI 0 "s_register_operand" "=&r")
+	(unspec_volatile:SIDI [(not:SIDI (and:SIDI
+			       (match_operand:SIDI 1 "arm_sync_memory_operand" "+Q")
+			       (match_operand:SIDI 2 "s_register_operand" "r")))
+			    ]
+			    VUNSPEC_SYNC_OLD_OP))
    (set (match_dup 1)
-        (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)]
+        (unspec_volatile:SIDI [(match_dup 1) (match_dup 2)]
 	                    VUNSPEC_SYNC_OLD_OP))
    (clobber (reg:CC CC_REGNUM))
-   (clobber (match_scratch:SI 3 "=&r"))
-   (clobber (match_scratch:SI 4 "<sync_clobber>"))]
-  "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
+   (clobber (match_scratch:SIDI 3 "=&r"))
+   (clobber (match_scratch:SI 4 "=&r"))]
+  "<sync_predtab>"
   {
     return arm_output_sync_insn (insn, operands);
   } 
@@ -557,26 +451,26 @@
    (set_attr "sync_memory"          "1")
    (set_attr "sync_new_value"       "2")
    (set_attr "sync_t1"              "3")
-   (set_attr "sync_t2"              "<sync_t2_reqd>")
-   (set_attr "sync_op"              "<sync_optab>")
+   (set_attr "sync_t2"              "4")
+   (set_attr "sync_op"              "nand")
    (set_attr "conds" 		    "clob")
    (set_attr "predicable" "no")])
 
 (define_insn "arm_sync_old_nand<mode>"
   [(set (match_operand:SI 0 "s_register_operand" "=&r")
-        (unspec_volatile:SI [(not:SI (and:SI
-                               (zero_extend:SI
-			         (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q"))
-                               (match_operand:SI 2 "s_register_operand" "r")))
-	                    ]
-	                    VUNSPEC_SYNC_OLD_OP))
+	(unspec_volatile:SI [(not:SI (and:SI
+			       (zero_extend:SI
+				 (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q"))
+			       (match_operand:SI 2 "s_register_operand" "r")))
+			    ]
+			    VUNSPEC_SYNC_OLD_OP))
    (set (match_dup 1)
-        (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)]
-	                    VUNSPEC_SYNC_OLD_OP))
+	(unspec_volatile:NARROW [(match_dup 1) (match_dup 2)]
+			    VUNSPEC_SYNC_OLD_OP))
    (clobber (reg:CC CC_REGNUM))
    (clobber (match_scratch:SI 3 "=&r"))
    (clobber (match_scratch:SI 4 "=&r"))]
-  "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
+  "<sync_predtab>"
   {
     return arm_output_sync_insn (insn, operands);
   } 
@@ -600,3 +494,36 @@
    (set_attr "conds" "unconditional")
    (set_attr "predicable" "no")])
 
+(define_expand "sync_lock_releasedi"
+ [(match_operand:DI 0 "memory_operand")
+  (match_operand:DI 1 "s_register_operand")]
+ "TARGET_HAVE_LDREXD && ARM_DOUBLEWORD_ALIGN && TARGET_HAVE_MEMORY_BARRIER"
+ { 
+   struct arm_sync_generator generator;
+   rtx tmp1 = gen_reg_rtx (DImode);
+   generator.op = arm_sync_generator_omn;
+   generator.u.omn = gen_arm_sync_lock_releasedi;
+   arm_expand_sync (DImode, &generator, operands[1], operands[0], NULL, tmp1);
+   DONE;
+ }
+)
+
+(define_insn "arm_sync_lock_releasedi"
+ [(set (match_operand:DI 2 "s_register_operand" "=&r")
+       (unspec_volatile:DI [(match_operand:DI 1 "arm_sync_memory_operand" "+Q")
+       			    (match_operand:DI 0 "s_register_operand" "r")]
+			    VUNSPEC_SYNC_RELEASE))
+  (clobber (reg:CC CC_REGNUM))
+  (clobber (match_scratch:SI 3 "=&r"))]
+  "TARGET_HAVE_LDREXD && ARM_DOUBLEWORD_ALIGN && TARGET_HAVE_MEMORY_BARRIER"
+ {
+  return arm_output_sync_insn (insn, operands);
+ }
+ [(set_attr "sync_memory"          "1")
+  (set_attr "sync_result" 	   "2")
+  (set_attr "sync_t1" 		   "2")
+  (set_attr "sync_t2" 		   "3")
+  (set_attr "sync_new_value" 	   "0")
+  (set_attr "conds"             "clob")
+  (set_attr "predicable"          "no")]
+)
--- a/src/gcc/config/arm/t-arm
+++ b/src/gcc/config/arm/t-arm
@@ -31,6 +31,16 @@
 		$(srcdir)/config/arm/fmp626.md \
 		$(srcdir)/config/arm/fa726te.md \
 		$(srcdir)/config/arm/arm926ejs.md \
+		$(srcdir)/config/arm/cortex-a15.md \
+		$(srcdir)/config/arm/cortex-a5.md \
+		$(srcdir)/config/arm/cortex-a8.md \
+		$(srcdir)/config/arm/cortex-a8-neon.md \
+		$(srcdir)/config/arm/cortex-a9.md \
+		$(srcdir)/config/arm/cortex-a9-neon.md \
+		$(srcdir)/config/arm/cortex-m4-fpu.md \
+		$(srcdir)/config/arm/cortex-m4.md \
+		$(srcdir)/config/arm/cortex-r4f.md \
+		$(srcdir)/config/arm/cortex-r4.md \
 		$(srcdir)/config/arm/cirrus.md \
 		$(srcdir)/config/arm/fpa.md \
 		$(srcdir)/config/arm/vec-common.md \
--- a/src/gcc/config/arm/thumb2.md
+++ b/src/gcc/config/arm/thumb2.md
@@ -207,7 +207,9 @@
 (define_insn "*thumb2_movhi_insn"
   [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,m,r")
 	(match_operand:HI 1 "general_operand"      "rI,n,r,m"))]
-  "TARGET_THUMB2"
+  "TARGET_THUMB2
+  && (register_operand (operands[0], HImode)
+     || register_operand (operands[1], HImode))"
   "@
    mov%?\\t%0, %1\\t%@ movhi
    movw%?\\t%0, %L1\\t%@ movhi
@@ -780,26 +782,6 @@
    (set_attr "length" "2")]
 )
 
-(define_insn "divsi3"
-  [(set (match_operand:SI	  0 "s_register_operand" "=r")
-	(div:SI (match_operand:SI 1 "s_register_operand"  "r")
-		(match_operand:SI 2 "s_register_operand"  "r")))]
-  "TARGET_THUMB2 && arm_arch_hwdiv"
-  "sdiv%?\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "insn" "sdiv")]
-)
-
-(define_insn "udivsi3"
-  [(set (match_operand:SI	   0 "s_register_operand" "=r")
-	(udiv:SI (match_operand:SI 1 "s_register_operand"  "r")
-		 (match_operand:SI 2 "s_register_operand"  "r")))]
-  "TARGET_THUMB2 && arm_arch_hwdiv"
-  "udiv%?\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "insn" "udiv")]
-)
-
 (define_insn "*thumb2_subsi_short"
   [(set (match_operand:SI 0 "low_register_operand" "=l")
 	(minus:SI (match_operand:SI 1 "low_register_operand" "l")
@@ -837,7 +819,7 @@
   "operands[4] = GEN_INT (- INTVAL (operands[2]));"
 )
 
-(define_insn "*thumb2_addsi3_compare0"
+(define_insn "thumb2_addsi3_compare0"
   [(set (reg:CC_NOOV CC_REGNUM)
 	(compare:CC_NOOV
 	  (plus:SI (match_operand:SI 1 "s_register_operand" "l,  0, r")
@@ -1119,3 +1101,54 @@
   "
   operands[2] = GEN_INT (32 - INTVAL (operands[2]));
   ")
+
+;; Define the subtract-one-and-jump insns so loop.c
+;; knows what to generate.
+(define_expand "doloop_end"
+  [(use (match_operand 0 "" ""))      ; loop pseudo
+   (use (match_operand 1 "" ""))      ; iterations; zero if unknown
+   (use (match_operand 2 "" ""))      ; max iterations
+   (use (match_operand 3 "" ""))      ; loop level
+   (use (match_operand 4 "" ""))]     ; label
+  "TARGET_32BIT"
+  "
+ {
+   /* Currently SMS relies on the do-loop pattern to recognize loops
+      where (1) the control part consists of all insns defining and/or
+      using a certain 'count' register and (2) the loop count can be
+      adjusted by modifying this register prior to the loop.
+      ??? The possible introduction of a new block to initialize the
+      new IV can potentially affect branch optimizations.  */
+   if (optimize > 0 && flag_modulo_sched)
+   {
+     rtx s0;
+     rtx bcomp;
+     rtx loc_ref;
+     rtx cc_reg;
+     rtx insn;
+     rtx cmp;
+
+    /* Only use this on innermost loops.  */
+     if (INTVAL (operands[3]) > 1)
+       FAIL;
+     if (GET_MODE (operands[0]) != SImode)
+       FAIL;
+
+     s0 = operands [0];
+     if (TARGET_THUMB2)
+       insn = emit_insn (gen_thumb2_addsi3_compare0 (s0, s0, GEN_INT (-1)));
+     else
+       insn = emit_insn (gen_addsi3_compare0 (s0, s0, GEN_INT (-1)));
+
+     cmp = XVECEXP (PATTERN (insn), 0, 0);
+     cc_reg = SET_DEST (cmp);
+     bcomp = gen_rtx_NE (VOIDmode, cc_reg, const0_rtx);
+     loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands [4]);
+     emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
+                                  gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
+                                                        loc_ref, pc_rtx)));
+     DONE;
+   }else
+      FAIL;
+}")
+
--- a/src/gcc/config/arm/t-linux-eabi
+++ b/src/gcc/config/arm/t-linux-eabi
@@ -41,3 +41,4 @@
 EXTRA_MULTILIB_PARTS=crtbegin.o crtend.o crtbeginS.o crtendS.o crtbeginT.o
 
 LIB2FUNCS_STATIC_EXTRA += $(srcdir)/config/arm/linux-atomic.c
+LIB2FUNCS_STATIC_EXTRA += $(srcdir)/config/arm/linux-atomic-64bit.c
--- a/src/gcc/config/arm/unwind-arm.c
+++ b/src/gcc/config/arm/unwind-arm.c
@@ -32,13 +32,18 @@
 typedef unsigned char bool;
 
 typedef struct _ZSt9type_info type_info; /* This names C++ type_info type */
+enum __cxa_type_match_result
+  {
+    ctm_failed = 0,
+    ctm_succeeded = 1,
+    ctm_succeeded_with_ptr_to_base = 2
+  };
 
 void __attribute__((weak)) __cxa_call_unexpected(_Unwind_Control_Block *ucbp);
 bool __attribute__((weak)) __cxa_begin_cleanup(_Unwind_Control_Block *ucbp);
-bool __attribute__((weak)) __cxa_type_match(_Unwind_Control_Block *ucbp,
-					    const type_info *rttip,
-					    bool is_reference,
-					    void **matched_object);
+enum __cxa_type_match_result __attribute__((weak)) __cxa_type_match
+  (_Unwind_Control_Block *ucbp, const type_info *rttip,
+   bool is_reference, void **matched_object);
 
 _Unwind_Ptr __attribute__((weak))
 __gnu_Unwind_Find_exidx (_Unwind_Ptr, int *);
@@ -1107,6 +1112,7 @@
 		      _uw rtti;
 		      bool is_reference = (data[0] & uint32_highbit) != 0;
 		      void *matched;
+		      enum __cxa_type_match_result match_type;
 
 		      /* Check for no-throw areas.  */
 		      if (data[1] == (_uw) -2)
@@ -1118,17 +1124,31 @@
 			{
 			  /* Match a catch specification.  */
 			  rtti = _Unwind_decode_target2 ((_uw) &data[1]);
-			  if (!__cxa_type_match (ucbp, (type_info *) rtti,
-						 is_reference,
-						 &matched))
-			    matched = (void *)0;
+			  match_type = __cxa_type_match (ucbp,
+							 (type_info *) rtti,
+							 is_reference,
+							 &matched);
 			}
+		      else
+			match_type = ctm_succeeded;
 
-		      if (matched)
+		      if (match_type)
 			{
 			  ucbp->barrier_cache.sp =
 			    _Unwind_GetGR (context, R_SP);
-			  ucbp->barrier_cache.bitpattern[0] = (_uw) matched;
+			  // ctm_succeeded_with_ptr_to_base really
+			  // means _c_t_m indirected the pointer
+			  // object.  We have to reconstruct the
+			  // additional pointer layer by using a temporary.
+			  if (match_type == ctm_succeeded_with_ptr_to_base)
+			    {
+			      ucbp->barrier_cache.bitpattern[2]
+				= (_uw) matched;
+			      ucbp->barrier_cache.bitpattern[0]
+				= (_uw) &ucbp->barrier_cache.bitpattern[2];
+			    }
+			  else
+			    ucbp->barrier_cache.bitpattern[0] = (_uw) matched;
 			  ucbp->barrier_cache.bitpattern[1] = (_uw) data;
 			  return _URC_HANDLER_FOUND;
 			}
@@ -1196,8 +1216,6 @@
 		  ucbp->barrier_cache.bitpattern[4] = (_uw) &data[1];
 
 		  if (data[0] & uint32_highbit)
-		    phase2_call_unexpected_after_unwind = 1;
-		  else
 		    {
 		      data += rtti_count + 1;
 		      /* Setup for entry to the handler.  */
@@ -1207,6 +1225,8 @@
 		      _Unwind_SetGR (context, 0, (_uw) ucbp);
 		      return _URC_INSTALL_CONTEXT;
 		    }
+		  else
+		    phase2_call_unexpected_after_unwind = 1;
 		}
 	      if (data[0] & uint32_highbit)
 		data++;
--- a/src/gcc/config/arm/vfp.md
+++ b/src/gcc/config/arm/vfp.md
@@ -401,8 +401,8 @@
 ;; DFmode moves
 
 (define_insn "*movdf_vfp"
-  [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,r, m,w  ,Uv,w,r")
-	(match_operand:DF 1 "soft_df_operand"		   " ?r,w,Dy,mF,r,UvF,w, w,r"))]
+  [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w  ,Uv,r, m,w,r")
+	(match_operand:DF 1 "soft_df_operand"		   " ?r,w,Dy,UvF,w ,mF,r,w,r"))]
   "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP
    && (   register_operand (operands[0], DFmode)
        || register_operand (operands[1], DFmode))"
@@ -418,9 +418,9 @@
 	gcc_assert (TARGET_VFP_DOUBLE);
         return \"fconstd%?\\t%P0, #%G1\";
       case 3: case 4:
-	return output_move_double (operands);
-      case 5: case 6:
 	return output_move_vfp (operands);
+      case 5: case 6:
+	return output_move_double (operands);
       case 7:
 	if (TARGET_VFP_SINGLE)
 	  return \"fcpys%?\\t%0, %1\;fcpys%?\\t%p0, %p1\";
@@ -435,7 +435,7 @@
   "
   [(set_attr "type"
      "r_2_f,f_2_r,fconstd,f_loadd,f_stored,load2,store2,ffarithd,*")
-   (set (attr "length") (cond [(eq_attr "alternative" "3,4,8") (const_int 8)
+   (set (attr "length") (cond [(eq_attr "alternative" "5,6,8") (const_int 8)
 			       (eq_attr "alternative" "7")
 				(if_then_else
 				 (eq (symbol_ref "TARGET_VFP_SINGLE")
@@ -449,8 +449,8 @@
 )
 
 (define_insn "*thumb2_movdf_vfp"
-  [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,r, m,w  ,Uv,w,r")
-	(match_operand:DF 1 "soft_df_operand"		   " ?r,w,Dy,mF,r,UvF,w, w,r"))]
+  [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w  ,Uv,r ,m,w,r")
+	(match_operand:DF 1 "soft_df_operand"		   " ?r,w,Dy,UvF,w, mF,r, w,r"))]
   "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP"
   "*
   {
@@ -463,10 +463,10 @@
       case 2:
 	gcc_assert (TARGET_VFP_DOUBLE);
 	return \"fconstd%?\\t%P0, #%G1\";
-      case 3: case 4: case 8:
-	return output_move_double (operands);
-      case 5: case 6:
+      case 3: case 4:
 	return output_move_vfp (operands);
+      case 5: case 6: case 8:
+	return output_move_double (operands);
       case 7:
 	if (TARGET_VFP_SINGLE)
 	  return \"fcpys%?\\t%0, %1\;fcpys%?\\t%p0, %p1\";
@@ -478,8 +478,8 @@
     }
   "
   [(set_attr "type"
-     "r_2_f,f_2_r,fconstd,load2,store2,f_loadd,f_stored,ffarithd,*")
-   (set (attr "length") (cond [(eq_attr "alternative" "3,4,8") (const_int 8)
+     "r_2_f,f_2_r,fconstd,f_loadd,f_stored,load2,store2,ffarithd,*")
+   (set (attr "length") (cond [(eq_attr "alternative" "5,6,8") (const_int 8)
 			       (eq_attr "alternative" "7")
 				(if_then_else
 				 (eq (symbol_ref "TARGET_VFP_SINGLE")
@@ -487,8 +487,8 @@
 				 (const_int 8)
 				 (const_int 4))]
 			      (const_int 4)))
-   (set_attr "pool_range" "*,*,*,4096,*,1020,*,*,*")
-   (set_attr "neg_pool_range" "*,*,*,0,*,1008,*,*,*")]
+   (set_attr "pool_range" "*,*,*,1020,*,4096,*,*,*")
+   (set_attr "neg_pool_range" "*,*,*,1008,*,0,*,*,*")]
 )
 
 
@@ -1131,9 +1131,40 @@
    (set_attr "type" "fcmpd")]
 )
 
+;; Fixed point to floating point conversions. 
+(define_code_iterator FCVT [unsigned_float float])
+(define_code_attr FCVTI32typename [(unsigned_float "u32") (float "s32")])
+
+(define_insn "*combine_vcvt_f32_<FCVTI32typename>"
+  [(set (match_operand:SF 0 "s_register_operand" "=t")
+	(mult:SF (FCVT:SF (match_operand:SI 1 "s_register_operand" "0"))
+		 (match_operand 2 
+			"const_double_vcvt_power_of_two_reciprocal" "Dt")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP3 && !flag_rounding_math"
+  "vcvt.f32.<FCVTI32typename>\\t%0, %1, %v2"
+ [(set_attr "predicable" "no")
+  (set_attr "type" "f_cvt")]
+)
+
+;; Not the ideal way of implementing this. Ideally we would be able to split
+;; this into a move to a DP register and then a vcvt.f64.i32
+(define_insn "*combine_vcvt_f64_<FCVTI32typename>"
+  [(set (match_operand:DF 0 "s_register_operand" "=x,x,w")
+	(mult:DF (FCVT:DF (match_operand:SI 1 "s_register_operand" "r,t,r"))
+		 (match_operand 2 
+		     "const_double_vcvt_power_of_two_reciprocal" "Dt,Dt,Dt")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP3 && !flag_rounding_math 
+  && !TARGET_VFP_SINGLE"
+  "@
+  vmov.f32\\t%0, %1\;vcvt.f64.<FCVTI32typename>\\t%P0, %P0, %v2
+  vmov.f32\\t%0, %1\;vcvt.f64.<FCVTI32typename>\\t%P0, %P0, %v2
+  vmov.f64\\t%P0, %1, %1\; vcvt.f64.<FCVTI32typename>\\t%P0, %P0, %v2"
+ [(set_attr "predicable" "no")
+  (set_attr "type" "f_cvt")
+  (set_attr "length" "8")]
+)
 
 ;; Store multiple insn used in function prologue.
-
 (define_insn "*push_multi_vfp"
   [(match_parallel 2 "multi_register_push"
     [(set (match_operand:BLK 0 "memory_operand" "=m")
--- a/src/gcc/config/arm/x-arm
+++ b/src/gcc/config/arm/x-arm
@@ -0,0 +1,3 @@
+driver-arm.o: $(srcdir)/config/arm/driver-arm.c \
+  $(CONFIG_H) $(SYSTEM_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
--- a/src/gcc/config/host-linux.c
+++ b/src/gcc/config/host-linux.c
@@ -85,7 +85,7 @@
 #elif defined(__mc68000__)
 # define TRY_EMPTY_VM_SPACE	0x40000000
 #elif defined(__ARM_EABI__)
-# define TRY_EMPTY_VM_SPACE     0x60000000
+# define TRY_EMPTY_VM_SPACE	0x60000000
 #else
 # define TRY_EMPTY_VM_SPACE	0
 #endif
--- a/src/gcc/config/rs6000/rs6000.c
+++ b/src/gcc/config/rs6000/rs6000.c
@@ -5146,7 +5146,9 @@
   for (i = 0; i < n_elts; ++i)
     {
       x = XVECEXP (vals, 0, i);
-      if (!CONSTANT_P (x))
+      if (!(CONST_INT_P (x)
+	    || GET_CODE (x) == CONST_DOUBLE
+	    || GET_CODE (x) == CONST_FIXED))
 	++n_var;
     }
   if (n_var == 0)
@@ -5298,7 +5300,9 @@
   for (i = 0; i < n_elts; ++i)
     {
       x = XVECEXP (vals, 0, i);
-      if (!CONSTANT_P (x))
+      if (!(CONST_INT_P (x)
+	    || GET_CODE (x) == CONST_DOUBLE
+	    || GET_CODE (x) == CONST_FIXED))
 	++n_var, one_var = i;
       else if (x != CONST0_RTX (inner_mode))
 	all_const_zero = false;
--- a/src/gcc/config.gcc
+++ b/src/gcc/config.gcc
@@ -177,7 +177,12 @@
 #  configure_default_options
 #			Set to an initializer for configure_default_options
 #			in configargs.h, based on --with-cpu et cetera.
-
+#
+#  target_type_format_char 
+#  			The default character to be used for formatting
+#			the attribute in a
+#			.type symbol_name, ${t_t_f_c}<property>
+#			directive.
 # The following variables are used in each case-construct to build up the
 # outgoing variables:
 #
@@ -219,6 +224,7 @@
 target_gtfiles=
 need_64bit_hwint=
 need_64bit_isa=
+target_type_format_char='@'
 
 # Don't carry these over build->host->target.  Please.
 xm_file=
@@ -312,6 +318,7 @@
 arm*-*-*)
 	cpu_type=arm
 	extra_headers="mmintrin.h arm_neon.h"
+	target_type_format_char='%'
 	c_target_objs="arm-c.o"
 	cxx_target_objs="arm-c.o"
 	;;
--- a/src/gcc/config.host
+++ b/src/gcc/config.host
@@ -100,6 +100,14 @@
 esac
 
 case ${host} in
+  arm*-*-linux*)
+    case ${target} in
+      arm*-*-*)
+	host_extra_gcc_objs="driver-arm.o"
+	host_xmake_file="${host_xmake_file} arm/x-arm"
+	;;
+    esac
+    ;;
   alpha*-*-linux*)
     case ${target} in
       alpha*-*-linux*)
--- a/src/gcc/configure
+++ b/src/gcc/configure
@@ -1652,7 +1652,8 @@
                           use sysroot as the system root during the build
   --with-sysroot=DIR Search for usr/lib, usr/include, et al, within DIR.
   --with-specs=SPECS      add SPECS to driver command-line processing
-  --with-pkgversion=PKG   Use PKG in the version string in place of "GCC"
+  --with-pkgversion=PKG   Use PKG in the version string in place of "Linaro
+                          GCC `cat $srcdir/LINARO-VERSION`"
   --with-bugurl=URL       Direct users to URL to report a bug
   --with-multilib-list    Select multilibs (SH only)
   --with-gnu-ld           assume the C compiler uses GNU ld default=no
@@ -7161,7 +7162,7 @@
       *)   PKGVERSION="($withval) " ;;
      esac
 else
-  PKGVERSION="(GCC) "
+  PKGVERSION="(Linaro GCC `cat $srcdir/LINARO-VERSION`) "
 
 fi
 
@@ -17527,7 +17528,7 @@
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 17530 "configure"
+#line 17531 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -17633,7 +17634,7 @@
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 17636 "configure"
+#line 17637 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -25259,7 +25260,7 @@
   then gcc_cv_as_gnu_unique_object=yes
 fi
   elif test x$gcc_cv_as != x; then
-    echo '.type foo, @gnu_unique_object' > conftest.s
+    echo '.type foo, '$target_type_format_char'gnu_unique_object' > conftest.s
     if { ac_try='$gcc_cv_as $gcc_cv_as_flags  -o conftest.o conftest.s >&5'
   { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
   (eval $ac_try) 2>&5
@@ -25278,7 +25279,8 @@
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_gnu_unique_object" >&5
 $as_echo "$gcc_cv_as_gnu_unique_object" >&6; }
 if test $gcc_cv_as_gnu_unique_object = yes; then
-  # Also check for ld.so support, i.e. glibc 2.11 or higher.
+  # We need to unquote above to to use the definition from config.gcc.
+# Also check for ld.so support, i.e. glibc 2.11 or higher.
    if test x$host = x$build -a x$host = x$target &&
        ldd --version 2>/dev/null &&
        glibcver=`ldd --version 2>/dev/null | sed 's/.* //;q'`; then
--- a/src/gcc/configure.ac
+++ b/src/gcc/configure.ac
@@ -782,7 +782,7 @@
 )
 AC_SUBST(CONFIGURE_SPECS)
 
-ACX_PKGVERSION([GCC])
+ACX_PKGVERSION([Linaro GCC `cat $srcdir/LINARO-VERSION`])
 ACX_BUGURL([http://gcc.gnu.org/bugs.html])
 
 # Sanity check enable_languages in case someone does not run the toplevel
@@ -3945,7 +3945,8 @@
   esac],
  [gcc_GAS_CHECK_FEATURE([gnu_unique_object], gcc_cv_as_gnu_unique_object,
    [elf,2,19,52],,
-   [.type foo, @gnu_unique_object],,
+   [.type foo, '$target_type_format_char'gnu_unique_object],,
+# We need to unquote above to to use the definition from config.gcc.
 # Also check for ld.so support, i.e. glibc 2.11 or higher.
    [[if test x$host = x$build -a x$host = x$target &&
        ldd --version 2>/dev/null &&
--- a/src/gcc/cp/typeck2.c
+++ b/src/gcc/cp/typeck2.c
@@ -479,18 +479,20 @@
 
 
 /* The recursive part of split_nonconstant_init.  DEST is an lvalue
-   expression to which INIT should be assigned.  INIT is a CONSTRUCTOR.  */
+   expression to which INIT should be assigned.  INIT is a CONSTRUCTOR.
+   Return true if the whole of the value was initialized by the
+   generated statements.  */
 
-static void
-split_nonconstant_init_1 (tree dest, tree *initp)
+static bool
+split_nonconstant_init_1 (tree dest, tree init)
 {
   unsigned HOST_WIDE_INT idx;
-  tree init = *initp;
   tree field_index, value;
   tree type = TREE_TYPE (dest);
   tree inner_type = NULL;
   bool array_type_p = false;
-  HOST_WIDE_INT num_type_elements, num_initialized_elements;
+  bool complete_p = true;
+  HOST_WIDE_INT num_split_elts = 0;
 
   switch (TREE_CODE (type))
     {
@@ -502,7 +504,6 @@
     case RECORD_TYPE:
     case UNION_TYPE:
     case QUAL_UNION_TYPE:
-      num_initialized_elements = 0;
       FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (init), idx,
 				field_index, value)
 	{
@@ -525,13 +526,14 @@
 		sub = build3 (COMPONENT_REF, inner_type, dest, field_index,
 			      NULL_TREE);
 
-	      split_nonconstant_init_1 (sub, &value);
+	      if (!split_nonconstant_init_1 (sub, value))
+		complete_p = false;
+	      num_split_elts++;
 	    }
 	  else if (!initializer_constant_valid_p (value, inner_type))
 	    {
 	      tree code;
 	      tree sub;
-	      HOST_WIDE_INT inner_elements;
 
 	      /* FIXME: Ordered removal is O(1) so the whole function is
 		 worst-case quadratic. This could be fixed using an aside
@@ -555,21 +557,9 @@
 	      code = build_stmt (input_location, EXPR_STMT, code);
 	      add_stmt (code);
 
-	      inner_elements = count_type_elements (inner_type, true);
-	      if (inner_elements < 0)
-		num_initialized_elements = -1;
-	      else if (num_initialized_elements >= 0)
-		num_initialized_elements += inner_elements;
-	      continue;
+	      num_split_elts++;
 	    }
 	}
-
-      num_type_elements = count_type_elements (type, true);
-      /* If all elements of the initializer are non-constant and
-	 have been split out, we don't need the empty CONSTRUCTOR.  */
-      if (num_type_elements > 0
-	  && num_type_elements == num_initialized_elements)
-	*initp = NULL;
       break;
 
     case VECTOR_TYPE:
@@ -581,6 +571,7 @@
 	  code = build2 (MODIFY_EXPR, type, dest, cons);
 	  code = build_stmt (input_location, EXPR_STMT, code);
 	  add_stmt (code);
+	  num_split_elts += CONSTRUCTOR_NELTS (init);
 	}
       break;
 
@@ -590,6 +581,8 @@
 
   /* The rest of the initializer is now a constant. */
   TREE_CONSTANT (init) = 1;
+  return complete_p && complete_ctor_at_level_p (TREE_TYPE (init),
+						 num_split_elts, inner_type);
 }
 
 /* A subroutine of store_init_value.  Splits non-constant static
@@ -605,7 +598,8 @@
   if (TREE_CODE (init) == CONSTRUCTOR)
     {
       code = push_stmt_list ();
-      split_nonconstant_init_1 (dest, &init);
+      if (split_nonconstant_init_1 (dest, init))
+	init = NULL_TREE;
       code = pop_stmt_list (code);
       DECL_INITIAL (dest) = init;
       TREE_READONLY (dest) = 0;
--- a/src/gcc/ddg.c
+++ b/src/gcc/ddg.c
@@ -145,6 +145,45 @@
   return rtx_mem_access_p (PATTERN (insn));
 }
 
+/* Return true if DEF_INSN contains address being auto-inc or auto-dec
+   which is used in USE_INSN.  Otherwise return false.  The result is
+   being used to decide whether to remove the edge between def_insn and
+   use_insn when -fmodulo-sched-allow-regmoves is set.  This function
+   doesn't need to consider the specific address register; no reg_moves
+   will be allowed for any life range defined by def_insn and used
+   by use_insn, if use_insn uses an address register auto-inc'ed by
+   def_insn.  */
+bool
+autoinc_var_is_used_p (rtx def_insn, rtx use_insn)
+{
+  rtx note;
+
+  for (note = REG_NOTES (def_insn); note; note = XEXP (note, 1))
+    if (REG_NOTE_KIND (note) == REG_INC
+	&& reg_referenced_p (XEXP (note, 0), PATTERN (use_insn)))
+      return true;
+
+  return false;
+}
+
+/* Return true if one of the definitions in INSN has MODE_CC.  Otherwise
+   return false.  */
+static bool
+def_has_ccmode_p (rtx insn)
+{
+  df_ref *def;
+
+  for (def = DF_INSN_DEFS (insn); *def; def++)
+    {
+      enum machine_mode mode = GET_MODE (DF_REF_REG (*def));
+
+      if (GET_MODE_CLASS (mode) == MODE_CC)
+	return true;
+    }
+
+  return false;
+}
+
 /* Computes the dependence parameters (latency, distance etc.), creates
    a ddg_edge and adds it to the given DDG.  */
 static void
@@ -173,10 +212,16 @@
      compensate for that by generating reg-moves based on the life-range
      analysis.  The anti-deps that will be deleted are the ones which
      have true-deps edges in the opposite direction (in other words
-     the kernel has only one def of the relevant register).  TODO:
-     support the removal of all anti-deps edges, i.e. including those
+     the kernel has only one def of the relevant register).
+     If the address that is being auto-inc or auto-dec in DEST_NODE
+     is used in SRC_NODE then do not remove the edge to make sure
+     reg-moves will not be created for this address.  
+     TODO: support the removal of all anti-deps edges, i.e. including those
      whose register has multiple defs in the loop.  */
-  if (flag_modulo_sched_allow_regmoves && (t == ANTI_DEP && dt == REG_DEP))
+  if (flag_modulo_sched_allow_regmoves 
+      && (t == ANTI_DEP && dt == REG_DEP)
+      && !def_has_ccmode_p (dest_node->insn)
+      && !autoinc_var_is_used_p (dest_node->insn, src_node->insn))
     {
       rtx set;
 
@@ -301,8 +346,16 @@
 
 	  gcc_assert (first_def_node);
 
+         /* Always create the edge if the use node is a branch in
+            order to prevent the creation of reg-moves.  
+            If the address that is being auto-inc or auto-dec in LAST_DEF
+            is used in USE_INSN then do not remove the edge to make sure
+            reg-moves will not be created for that address.  */
           if (DF_REF_ID (last_def) != DF_REF_ID (first_def)
-              || !flag_modulo_sched_allow_regmoves)
+              || !flag_modulo_sched_allow_regmoves
+	      || JUMP_P (use_node->insn)
+              || autoinc_var_is_used_p (DF_REF_INSN (last_def), use_insn)
+	      || def_has_ccmode_p (DF_REF_INSN (last_def)))
             create_ddg_dep_no_link (g, use_node, first_def_node, ANTI_DEP,
                                     REG_DEP, 1);
 
@@ -385,6 +438,33 @@
 			 &PATTERN (insn2));
 }
 
+/* Given two nodes, analyze their RTL insns and add intra-loop mem deps
+   to ddg G.  */
+static void
+add_intra_loop_mem_dep (ddg_ptr g, ddg_node_ptr from, ddg_node_ptr to)
+{
+
+  if ((from->cuid == to->cuid)
+      || !insns_may_alias_p (from->insn, to->insn))
+    /* Do not create edge if memory references have disjoint alias sets
+       or 'to' and 'from' are the same instruction.  */
+    return;
+
+  if (mem_write_insn_p (from->insn))
+    {
+      if (mem_read_insn_p (to->insn))
+	create_ddg_dep_no_link (g, from, to,
+				DEBUG_INSN_P (to->insn)
+				? ANTI_DEP : TRUE_DEP, MEM_DEP, 0);
+      else
+	create_ddg_dep_no_link (g, from, to,
+				DEBUG_INSN_P (to->insn)
+				? ANTI_DEP : OUTPUT_DEP, MEM_DEP, 0);
+    }
+  else if (!mem_read_insn_p (to->insn))
+    create_ddg_dep_no_link (g, from, to, ANTI_DEP, MEM_DEP, 0);
+}
+
 /* Given two nodes, analyze their RTL insns and add inter-loop mem deps
    to ddg G.  */
 static void
@@ -472,10 +552,22 @@
 	      if (DEBUG_INSN_P (j_node->insn))
 		continue;
 	      if (mem_access_insn_p (j_node->insn))
- 		/* Don't bother calculating inter-loop dep if an intra-loop dep
-		   already exists.  */
+		{
+		  /* Don't bother calculating inter-loop dep if an intra-loop dep
+		     already exists.  */
 	      	  if (! TEST_BIT (dest_node->successors, j))
 		    add_inter_loop_mem_dep (g, dest_node, j_node);
+		  /* If -fmodulo-sched-allow-regmoves
+		     is set certain anti-dep edges are not created.
+		     It might be that these anti-dep edges are on the
+		     path from one memory instruction to another such that
+		     removing these edges could cause a violation of the
+		     memory dependencies.  Thus we add intra edges between
+		     every two memory instructions in this case.  */
+		  if (flag_modulo_sched_allow_regmoves
+		      && !TEST_BIT (dest_node->predecessors, j))
+		    add_intra_loop_mem_dep (g, j_node, dest_node);
+		}
             }
         }
     }
@@ -1011,6 +1103,7 @@
   for (i = 0; i < all_sccs->num_sccs; i++)
     free_scc (all_sccs->sccs[i]);
 
+  free (all_sccs->sccs);
   free (all_sccs);
 }
 
--- a/src/gcc/ddg.h
+++ b/src/gcc/ddg.h
@@ -186,4 +186,6 @@
 int find_nodes_on_paths (sbitmap result, ddg_ptr, sbitmap from, sbitmap to);
 int longest_simple_path (ddg_ptr, int from, int to, sbitmap via);
 
+bool autoinc_var_is_used_p (rtx, rtx);
+
 #endif /* GCC_DDG_H */
--- a/src/gcc/df-problems.c
+++ b/src/gcc/df-problems.c
@@ -3375,7 +3375,7 @@
       while (*mws_rec)
 	{
 	  struct df_mw_hardreg *mws = *mws_rec;
-	  if ((DF_MWS_REG_DEF_P (mws))
+	  if (DF_MWS_REG_USE_P (mws)
 	      && !df_ignore_stack_reg (mws->start_regno))
 	    {
 	      bool really_add_notes = debug_insn != 0;
--- a/src/gcc/dojump.c
+++ b/src/gcc/dojump.c
@@ -36,6 +36,7 @@
 #include "ggc.h"
 #include "basic-block.h"
 #include "output.h"
+#include "tm_p.h"
 
 static bool prefer_and_bit_test (enum machine_mode, int);
 static void do_jump_by_parts_greater (tree, tree, int, rtx, rtx, int);
--- a/src/gcc/expmed.c
+++ b/src/gcc/expmed.c
@@ -657,6 +657,10 @@
       && GET_MODE (value) != BLKmode
       && bitsize > 0
       && GET_MODE_BITSIZE (op_mode) >= bitsize
+      /* Do not use insv for volatile bitfields when
+         -fstrict-volatile-bitfields is in effect.  */
+      && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
+	   && flag_strict_volatile_bitfields > 0)
       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 	    && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
       && insn_data[CODE_FOR_insv].operand[1].predicate (GEN_INT (bitsize),
@@ -700,19 +704,21 @@
 	  copy_back = true;
 	}
 
-      /* On big-endian machines, we count bits from the most significant.
-	 If the bit field insn does not, we must invert.  */
-
-      if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
-	xbitpos = unit - bitsize - xbitpos;
-
       /* We have been counting XBITPOS within UNIT.
 	 Count instead within the size of the register.  */
-      if (BITS_BIG_ENDIAN && !MEM_P (xop0))
+      if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
 	xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
 
       unit = GET_MODE_BITSIZE (op_mode);
 
+      /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
+         "backwards" from the size of the unit we are inserting into.
+	 Otherwise, we count bits from the most significant on a
+	 BYTES/BITS_BIG_ENDIAN machine.  */
+
+      if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
+	xbitpos = unit - bitsize - xbitpos;
+
       /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
       value1 = value;
       if (GET_MODE (value) != op_mode)
@@ -1528,6 +1534,10 @@
   if (ext_mode != MAX_MACHINE_MODE
       && bitsize > 0
       && GET_MODE_BITSIZE (ext_mode) >= bitsize
+      /* Do not use extv/extzv for volatile bitfields when
+         -fstrict-volatile-bitfields is in effect.  */
+      && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
+	   && flag_strict_volatile_bitfields > 0)
       /* If op0 is a register, we need it in EXT_MODE to make it
 	 acceptable to the format of ext(z)v.  */
       && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
@@ -1552,17 +1562,20 @@
 	/* Get ref to first byte containing part of the field.  */
 	xop0 = adjust_address (xop0, byte_mode, xoffset);
 
-      /* On big-endian machines, we count bits from the most significant.
-	 If the bit field insn does not, we must invert.  */
-      if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
-	xbitpos = unit - bitsize - xbitpos;
-
       /* Now convert from counting within UNIT to counting in EXT_MODE.  */
-      if (BITS_BIG_ENDIAN && !MEM_P (xop0))
+      if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
 	xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
 
       unit = GET_MODE_BITSIZE (ext_mode);
 
+      /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
+         "backwards" from the size of the unit we are extracting from.
+	 Otherwise, we count bits from the most significant on a
+	 BYTES/BITS_BIG_ENDIAN machine.  */
+
+      if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
+	xbitpos = unit - bitsize - xbitpos;
+
       if (xtarget == 0)
 	xtarget = xspec_target = gen_reg_rtx (tmode);
 
--- a/src/gcc/expr.c
+++ b/src/gcc/expr.c
@@ -1497,7 +1497,7 @@
   if (nregs == 0)
     return;
 
-  if (CONSTANT_P (x) && ! LEGITIMATE_CONSTANT_P (x))
+  if (CONSTANT_P (x) && !targetm.legitimate_constant_p (mode, x))
     x = validize_mem (force_const_mem (mode, x));
 
   /* See if the machine can do this with a load multiple insn.  */
@@ -2413,7 +2413,7 @@
 		    offset -= size;
 
 		  cst = (*constfun) (constfundata, offset, mode);
-		  if (!LEGITIMATE_CONSTANT_P (cst))
+		  if (!targetm.legitimate_constant_p (mode, cst))
 		    return 0;
 
 		  if (!reverse)
@@ -3468,7 +3468,7 @@
 
       y_cst = y;
 
-      if (!LEGITIMATE_CONSTANT_P (y))
+      if (!targetm.legitimate_constant_p (mode, y))
 	{
 	  y = force_const_mem (mode, y);
 
@@ -3524,7 +3524,7 @@
 
   REAL_VALUE_FROM_CONST_DOUBLE (r, y);
 
-  if (LEGITIMATE_CONSTANT_P (y))
+  if (targetm.legitimate_constant_p (dstmode, y))
     oldcost = rtx_cost (y, SET, speed);
   else
     oldcost = rtx_cost (force_const_mem (dstmode, y), SET, speed);
@@ -3547,7 +3547,7 @@
 
       trunc_y = CONST_DOUBLE_FROM_REAL_VALUE (r, srcmode);
 
-      if (LEGITIMATE_CONSTANT_P (trunc_y))
+      if (targetm.legitimate_constant_p (srcmode, trunc_y))
 	{
 	  /* Skip if the target needs extra instructions to perform
 	     the extension.  */
@@ -3960,7 +3960,7 @@
 	 by setting SKIP to 0.  */
       skip = (reg_parm_stack_space == 0) ? 0 : not_stack;
 
-      if (CONSTANT_P (x) && ! LEGITIMATE_CONSTANT_P (x))
+      if (CONSTANT_P (x) && !targetm.legitimate_constant_p (mode, x))
 	x = validize_mem (force_const_mem (mode, x));
 
       /* If X is a hard register in a non-integer mode, copy it into a pseudo;
@@ -4976,16 +4976,136 @@
   return NULL_RTX;
 }
 
+/* Return true if field F of structure TYPE is a flexible array.  */
+
+static bool
+flexible_array_member_p (const_tree f, const_tree type)
+{
+  const_tree tf;
+
+  tf = TREE_TYPE (f);
+  return (DECL_CHAIN (f) == NULL
+	  && TREE_CODE (tf) == ARRAY_TYPE
+	  && TYPE_DOMAIN (tf)
+	  && TYPE_MIN_VALUE (TYPE_DOMAIN (tf))
+	  && integer_zerop (TYPE_MIN_VALUE (TYPE_DOMAIN (tf)))
+	  && !TYPE_MAX_VALUE (TYPE_DOMAIN (tf))
+	  && int_size_in_bytes (type) >= 0);
+}
+
+/* If FOR_CTOR_P, return the number of top-level elements that a constructor
+   must have in order for it to completely initialize a value of type TYPE.
+   Return -1 if the number isn't known.
+
+   If !FOR_CTOR_P, return an estimate of the number of scalars in TYPE.  */
+
+static HOST_WIDE_INT
+count_type_elements (const_tree type, bool for_ctor_p)
+{
+  switch (TREE_CODE (type))
+    {
+    case ARRAY_TYPE:
+      {
+	tree nelts;
+
+	nelts = array_type_nelts (type);
+	if (nelts && host_integerp (nelts, 1))
+	  {
+	    unsigned HOST_WIDE_INT n;
+
+	    n = tree_low_cst (nelts, 1) + 1;
+	    if (n == 0 || for_ctor_p)
+	      return n;
+	    else
+	      return n * count_type_elements (TREE_TYPE (type), false);
+	  }
+	return for_ctor_p ? -1 : 1;
+      }
+
+    case RECORD_TYPE:
+      {
+	unsigned HOST_WIDE_INT n;
+	tree f;
+
+	n = 0;
+	for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
+	  if (TREE_CODE (f) == FIELD_DECL)
+	    {
+	      if (!for_ctor_p)
+		n += count_type_elements (TREE_TYPE (f), false);
+	      else if (!flexible_array_member_p (f, type))
+		/* Don't count flexible arrays, which are not supposed
+		   to be initialized.  */
+		n += 1;
+	    }
+
+	return n;
+      }
+
+    case UNION_TYPE:
+    case QUAL_UNION_TYPE:
+      {
+	tree f;
+	HOST_WIDE_INT n, m;
+
+	gcc_assert (!for_ctor_p);
+	/* Estimate the number of scalars in each field and pick the
+	   maximum.  Other estimates would do instead; the idea is simply
+	   to make sure that the estimate is not sensitive to the ordering
+	   of the fields.  */
+	n = 1;
+	for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
+	  if (TREE_CODE (f) == FIELD_DECL)
+	    {
+	      m = count_type_elements (TREE_TYPE (f), false);
+	      /* If the field doesn't span the whole union, add an extra
+		 scalar for the rest.  */
+	      if (simple_cst_equal (TYPE_SIZE (TREE_TYPE (f)),
+				    TYPE_SIZE (type)) != 1)
+		m++;
+	      if (n < m)
+		n = m;
+	    }
+	return n;
+      }
+
+    case COMPLEX_TYPE:
+      return 2;
+
+    case VECTOR_TYPE:
+      return TYPE_VECTOR_SUBPARTS (type);
+
+    case INTEGER_TYPE:
+    case REAL_TYPE:
+    case FIXED_POINT_TYPE:
+    case ENUMERAL_TYPE:
+    case BOOLEAN_TYPE:
+    case POINTER_TYPE:
+    case OFFSET_TYPE:
+    case REFERENCE_TYPE:
+      return 1;
+
+    case ERROR_MARK:
+      return 0;
+
+    case VOID_TYPE:
+    case METHOD_TYPE:
+    case FUNCTION_TYPE:
+    case LANG_TYPE:
+    default:
+      gcc_unreachable ();
+    }
+}
+
 /* Helper for categorize_ctor_elements.  Identical interface.  */
 
 static bool
 categorize_ctor_elements_1 (const_tree ctor, HOST_WIDE_INT *p_nz_elts,
-			    HOST_WIDE_INT *p_elt_count,
-			    bool *p_must_clear)
+			    HOST_WIDE_INT *p_init_elts, bool *p_complete)
 {
   unsigned HOST_WIDE_INT idx;
-  HOST_WIDE_INT nz_elts, elt_count;
-  tree value, purpose;
+  HOST_WIDE_INT nz_elts, init_elts, num_fields;
+  tree value, purpose, elt_type;
 
   /* Whether CTOR is a valid constant initializer, in accordance with what
      initializer_constant_valid_p does.  If inferred from the constructor
@@ -4994,7 +5114,9 @@
   bool const_p = const_from_elts_p ? true : TREE_STATIC (ctor);
 
   nz_elts = 0;
-  elt_count = 0;
+  init_elts = 0;
+  num_fields = 0;
+  elt_type = NULL_TREE;
 
   FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (ctor), idx, purpose, value)
     {
@@ -5009,6 +5131,8 @@
 	    mult = (tree_low_cst (hi_index, 1)
 		    - tree_low_cst (lo_index, 1) + 1);
 	}
+      num_fields += mult;
+      elt_type = TREE_TYPE (value);
 
       switch (TREE_CODE (value))
 	{
@@ -5016,11 +5140,11 @@
 	  {
 	    HOST_WIDE_INT nz = 0, ic = 0;
 
-	    bool const_elt_p
-	      = categorize_ctor_elements_1 (value, &nz, &ic, p_must_clear);
+	    bool const_elt_p = categorize_ctor_elements_1 (value, &nz, &ic,
+							   p_complete);
 
 	    nz_elts += mult * nz;
- 	    elt_count += mult * ic;
+ 	    init_elts += mult * ic;
 
 	    if (const_from_elts_p && const_p)
 	      const_p = const_elt_p;
@@ -5032,12 +5156,12 @@
 	case FIXED_CST:
 	  if (!initializer_zerop (value))
 	    nz_elts += mult;
-	  elt_count += mult;
+	  init_elts += mult;
 	  break;
 
 	case STRING_CST:
 	  nz_elts += mult * TREE_STRING_LENGTH (value);
-	  elt_count += mult * TREE_STRING_LENGTH (value);
+	  init_elts += mult * TREE_STRING_LENGTH (value);
 	  break;
 
 	case COMPLEX_CST:
@@ -5045,7 +5169,7 @@
 	    nz_elts += mult;
 	  if (!initializer_zerop (TREE_IMAGPART (value)))
 	    nz_elts += mult;
-	  elt_count += mult;
+	  init_elts += mult;
 	  break;
 
 	case VECTOR_CST:
@@ -5055,65 +5179,31 @@
 	      {
 		if (!initializer_zerop (TREE_VALUE (v)))
 		  nz_elts += mult;
-		elt_count += mult;
+		init_elts += mult;
 	      }
 	  }
 	  break;
 
 	default:
 	  {
-	    HOST_WIDE_INT tc = count_type_elements (TREE_TYPE (value), true);
-	    if (tc < 1)
-	      tc = 1;
+	    HOST_WIDE_INT tc = count_type_elements (elt_type, false);
 	    nz_elts += mult * tc;
-	    elt_count += mult * tc;
+	    init_elts += mult * tc;
 
 	    if (const_from_elts_p && const_p)
-	      const_p = initializer_constant_valid_p (value, TREE_TYPE (value))
+	      const_p = initializer_constant_valid_p (value, elt_type)
 			!= NULL_TREE;
 	  }
 	  break;
 	}
     }
 
-  if (!*p_must_clear
-      && (TREE_CODE (TREE_TYPE (ctor)) == UNION_TYPE
-	  || TREE_CODE (TREE_TYPE (ctor)) == QUAL_UNION_TYPE))
-    {
-      tree init_sub_type;
-      bool clear_this = true;
-
-      if (!VEC_empty (constructor_elt, CONSTRUCTOR_ELTS (ctor)))
-	{
-	  /* We don't expect more than one element of the union to be
-	     initialized.  Not sure what we should do otherwise... */
-          gcc_assert (VEC_length (constructor_elt, CONSTRUCTOR_ELTS (ctor))
-		      == 1);
-
-          init_sub_type = TREE_TYPE (VEC_index (constructor_elt,
-						CONSTRUCTOR_ELTS (ctor),
-						0)->value);
-
-	  /* ??? We could look at each element of the union, and find the
-	     largest element.  Which would avoid comparing the size of the
-	     initialized element against any tail padding in the union.
-	     Doesn't seem worth the effort...  */
-	  if (simple_cst_equal (TYPE_SIZE (TREE_TYPE (ctor)),
-				TYPE_SIZE (init_sub_type)) == 1)
-	    {
-	      /* And now we have to find out if the element itself is fully
-		 constructed.  E.g. for union { struct { int a, b; } s; } u
-		 = { .s = { .a = 1 } }.  */
-	      if (elt_count == count_type_elements (init_sub_type, false))
-		clear_this = false;
-	    }
-	}
-
-      *p_must_clear = clear_this;
-    }
+  if (*p_complete && !complete_ctor_at_level_p (TREE_TYPE (ctor),
+						num_fields, elt_type))
+    *p_complete = false;
 
   *p_nz_elts += nz_elts;
-  *p_elt_count += elt_count;
+  *p_init_elts += init_elts;
 
   return const_p;
 }
@@ -5123,111 +5213,50 @@
      and place it in *P_NZ_ELTS;
    * how many scalar fields in total are in CTOR,
      and place it in *P_ELT_COUNT.
-   * if a type is a union, and the initializer from the constructor
-     is not the largest element in the union, then set *p_must_clear.
+   * whether the constructor is complete -- in the sense that every
+     meaningful byte is explicitly given a value --
+     and place it in *P_COMPLETE.
 
    Return whether or not CTOR is a valid static constant initializer, the same
    as "initializer_constant_valid_p (CTOR, TREE_TYPE (CTOR)) != 0".  */
 
 bool
 categorize_ctor_elements (const_tree ctor, HOST_WIDE_INT *p_nz_elts,
-			  HOST_WIDE_INT *p_elt_count,
-			  bool *p_must_clear)
+			  HOST_WIDE_INT *p_init_elts, bool *p_complete)
 {
   *p_nz_elts = 0;
-  *p_elt_count = 0;
-  *p_must_clear = false;
+  *p_init_elts = 0;
+  *p_complete = true;
 
-  return
-    categorize_ctor_elements_1 (ctor, p_nz_elts, p_elt_count, p_must_clear);
+  return categorize_ctor_elements_1 (ctor, p_nz_elts, p_init_elts, p_complete);
 }
 
-/* Count the number of scalars in TYPE.  Return -1 on overflow or
-   variable-sized.  If ALLOW_FLEXARR is true, don't count flexible
-   array member at the end of the structure.  */
+/* TYPE is initialized by a constructor with NUM_ELTS elements, the last
+   of which had type LAST_TYPE.  Each element was itself a complete
+   initializer, in the sense that every meaningful byte was explicitly
+   given a value.  Return true if the same is true for the constructor
+   as a whole.  */
 
-HOST_WIDE_INT
-count_type_elements (const_tree type, bool allow_flexarr)
+bool
+complete_ctor_at_level_p (const_tree type, HOST_WIDE_INT num_elts,
+			  const_tree last_type)
 {
-  const HOST_WIDE_INT max = ~((HOST_WIDE_INT)1 << (HOST_BITS_PER_WIDE_INT-1));
-  switch (TREE_CODE (type))
+  if (TREE_CODE (type) == UNION_TYPE
+      || TREE_CODE (type) == QUAL_UNION_TYPE)
     {
-    case ARRAY_TYPE:
-      {
-	tree telts = array_type_nelts (type);
-	if (telts && host_integerp (telts, 1))
-	  {
-	    HOST_WIDE_INT n = tree_low_cst (telts, 1) + 1;
-	    HOST_WIDE_INT m = count_type_elements (TREE_TYPE (type), false);
-	    if (n == 0)
-	      return 0;
-	    else if (max / n > m)
-	      return n * m;
-	  }
-	return -1;
-      }
-
-    case RECORD_TYPE:
-      {
-	HOST_WIDE_INT n = 0, t;
-	tree f;
-
-	for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
-	  if (TREE_CODE (f) == FIELD_DECL)
-	    {
-	      t = count_type_elements (TREE_TYPE (f), false);
-	      if (t < 0)
-		{
-		  /* Check for structures with flexible array member.  */
-		  tree tf = TREE_TYPE (f);
-		  if (allow_flexarr
-		      && DECL_CHAIN (f) == NULL
-		      && TREE_CODE (tf) == ARRAY_TYPE
-		      && TYPE_DOMAIN (tf)
-		      && TYPE_MIN_VALUE (TYPE_DOMAIN (tf))
-		      && integer_zerop (TYPE_MIN_VALUE (TYPE_DOMAIN (tf)))
-		      && !TYPE_MAX_VALUE (TYPE_DOMAIN (tf))
-		      && int_size_in_bytes (type) >= 0)
-		    break;
-
-		  return -1;
-		}
-	      n += t;
-	    }
-
-	return n;
-      }
-
-    case UNION_TYPE:
-    case QUAL_UNION_TYPE:
-      return -1;
-
-    case COMPLEX_TYPE:
-      return 2;
-
-    case VECTOR_TYPE:
-      return TYPE_VECTOR_SUBPARTS (type);
-
-    case INTEGER_TYPE:
-    case REAL_TYPE:
-    case FIXED_POINT_TYPE:
-    case ENUMERAL_TYPE:
-    case BOOLEAN_TYPE:
-    case POINTER_TYPE:
-    case OFFSET_TYPE:
-    case REFERENCE_TYPE:
-      return 1;
+      if (num_elts == 0)
+	return false;
 
-    case ERROR_MARK:
-      return 0;
+      gcc_assert (num_elts == 1 && last_type);
 
-    case VOID_TYPE:
-    case METHOD_TYPE:
-    case FUNCTION_TYPE:
-    case LANG_TYPE:
-    default:
-      gcc_unreachable ();
+      /* ??? We could look at each element of the union, and find the
+	 largest element.  Which would avoid comparing the size of the
+	 initialized element against any tail padding in the union.
+	 Doesn't seem worth the effort...  */
+      return simple_cst_equal (TYPE_SIZE (type), TYPE_SIZE (last_type)) == 1;
     }
+
+  return count_type_elements (type, true) == num_elts;
 }
 
 /* Return 1 if EXP contains mostly (3/4)  zeros.  */
@@ -5236,18 +5265,12 @@
 mostly_zeros_p (const_tree exp)
 {
   if (TREE_CODE (exp) == CONSTRUCTOR)
-
     {
-      HOST_WIDE_INT nz_elts, count, elts;
-      bool must_clear;
-
-      categorize_ctor_elements (exp, &nz_elts, &count, &must_clear);
-      if (must_clear)
-	return 1;
-
-      elts = count_type_elements (TREE_TYPE (exp), false);
+      HOST_WIDE_INT nz_elts, init_elts;
+      bool complete_p;
 
-      return nz_elts < elts / 4;
+      categorize_ctor_elements (exp, &nz_elts, &init_elts, &complete_p);
+      return !complete_p || nz_elts < init_elts / 4;
     }
 
   return initializer_zerop (exp);
@@ -5259,12 +5282,11 @@
 all_zeros_p (const_tree exp)
 {
   if (TREE_CODE (exp) == CONSTRUCTOR)
-
     {
-      HOST_WIDE_INT nz_elts, count;
-      bool must_clear;
+      HOST_WIDE_INT nz_elts, init_elts;
+      bool complete_p;
 
-      categorize_ctor_elements (exp, &nz_elts, &count, &must_clear);
+      categorize_ctor_elements (exp, &nz_elts, &init_elts, &complete_p);
       return nz_elts == 0;
     }
 
@@ -7805,18 +7827,16 @@
 	{
 	  enum machine_mode innermode = TYPE_MODE (TREE_TYPE (treeop0));
 	  this_optab = usmul_widen_optab;
-	  if (mode == GET_MODE_2XWIDER_MODE (innermode))
+	  if (find_widening_optab_handler (this_optab, mode, innermode, 0)
+		!= CODE_FOR_nothing)
 	    {
-	      if (optab_handler (this_optab, mode) != CODE_FOR_nothing)
-		{
-		  if (TYPE_UNSIGNED (TREE_TYPE (treeop0)))
-		    expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1,
-				     EXPAND_NORMAL);
-		  else
-		    expand_operands (treeop0, treeop1, NULL_RTX, &op1, &op0,
-				     EXPAND_NORMAL);
-		  goto binop3;
-		}
+	      if (TYPE_UNSIGNED (TREE_TYPE (treeop0)))
+		expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1,
+				 EXPAND_NORMAL);
+	      else
+		expand_operands (treeop0, treeop1, NULL_RTX, &op1, &op0,
+				 EXPAND_NORMAL);
+	      goto binop3;
 	    }
 	}
       /* Check for a multiplication with matching signedness.  */
@@ -7831,10 +7851,10 @@
 	  optab other_optab = zextend_p ? smul_widen_optab : umul_widen_optab;
 	  this_optab = zextend_p ? umul_widen_optab : smul_widen_optab;
 
-	  if (mode == GET_MODE_2XWIDER_MODE (innermode)
-	      && TREE_CODE (treeop0) != INTEGER_CST)
+	  if (TREE_CODE (treeop0) != INTEGER_CST)
 	    {
-	      if (optab_handler (this_optab, mode) != CODE_FOR_nothing)
+	      if (find_widening_optab_handler (this_optab, mode, innermode, 0)
+		    != CODE_FOR_nothing)
 		{
 		  expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1,
 				   EXPAND_NORMAL);
@@ -7842,7 +7862,8 @@
 					       unsignedp, this_optab);
 		  return REDUCE_BIT_FIELD (temp);
 		}
-	      if (optab_handler (other_optab, mode) != CODE_FOR_nothing
+	      if (find_widening_optab_handler (other_optab, mode, innermode, 0)
+		    != CODE_FOR_nothing
 		  && innermode == word_mode)
 		{
 		  rtx htem, hipart;
@@ -8456,6 +8477,19 @@
 	return target;
       }
 
+    case VEC_WIDEN_LSHIFT_HI_EXPR:
+    case VEC_WIDEN_LSHIFT_LO_EXPR:
+      {
+        tree oprnd0 = treeop0;
+        tree oprnd1 = treeop1;
+
+        expand_operands (oprnd0, oprnd1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
+        target = expand_widen_pattern_expr (ops, op0, op1, NULL_RTX,
+                                            target, unsignedp);
+        gcc_assert (target);
+        return target;
+      }
+
     case VEC_PACK_TRUNC_EXPR:
     case VEC_PACK_SAT_EXPR:
     case VEC_PACK_FIX_TRUNC_EXPR:
@@ -8737,10 +8771,13 @@
 	  if (code == SSA_NAME
 	      && (g = SSA_NAME_DEF_STMT (ssa_name))
 	      && gimple_code (g) == GIMPLE_CALL)
-	    pmode = promote_function_mode (type, mode, &unsignedp,
-					   TREE_TYPE
-					   (TREE_TYPE (gimple_call_fn (g))),
-					   2);
+	    {
+	      gcc_assert (!gimple_call_internal_p (g));
+	      pmode = promote_function_mode (type, mode, &unsignedp,
+					     TREE_TYPE
+					     (TREE_TYPE (gimple_call_fn (g))),
+					     2);
+	    }
 	  else
 	    pmode = promote_decl_mode (exp, &unsignedp);
 	  gcc_assert (GET_MODE (decl_rtl) == pmode);
@@ -9301,7 +9338,7 @@
 	   constant and we don't need a memory reference.  */
 	if (CONSTANT_P (op0)
 	    && mode2 != BLKmode
-	    && LEGITIMATE_CONSTANT_P (op0)
+	    && targetm.legitimate_constant_p (mode2, op0)
 	    && !must_force_mem)
 	  op0 = force_reg (mode2, op0);
 
--- a/src/gcc/flag-types.h
+++ b/src/gcc/flag-types.h
@@ -106,6 +106,14 @@
 };
 #endif
 
+/* The algorithm used to implement -fsched-pressure.  */
+enum sched_pressure_algorithm
+{
+  SCHED_PRESSURE_NONE,
+  SCHED_PRESSURE_WEIGHTED,
+  SCHED_PRESSURE_MODEL
+};
+
 /* The algorithm used for the integrated register allocator (IRA).  */
 enum ira_algorithm
 {
--- a/src/gcc/fold-const.c
+++ b/src/gcc/fold-const.c
@@ -9239,15 +9239,10 @@
    0 <= N < M as is common.  In general, the precise value of P is unknown.
    M is chosen as large as possible such that constant N can be determined.
 
-   Returns M and sets *RESIDUE to N.
-
-   If ALLOW_FUNC_ALIGN is true, do take functions' DECL_ALIGN_UNIT into
-   account.  This is not always possible due to PR 35705.
- */
+   Returns M and sets *RESIDUE to N.  */
 
 static unsigned HOST_WIDE_INT
-get_pointer_modulus_and_residue (tree expr, unsigned HOST_WIDE_INT *residue,
-				 bool allow_func_align)
+get_pointer_modulus_and_residue (tree expr, unsigned HOST_WIDE_INT *residue)
 {
   enum tree_code code;
 
@@ -9277,9 +9272,8 @@
 	    }
 	}
 
-      if (DECL_P (expr)
-	  && (allow_func_align || TREE_CODE (expr) != FUNCTION_DECL))
-	return DECL_ALIGN_UNIT (expr);
+      if (DECL_P (expr))
+	return get_object_alignment (expr, ~0U) / BITS_PER_UNIT;
     }
   else if (code == POINTER_PLUS_EXPR)
     {
@@ -9289,8 +9283,7 @@
 
       op0 = TREE_OPERAND (expr, 0);
       STRIP_NOPS (op0);
-      modulus = get_pointer_modulus_and_residue (op0, residue,
-						 allow_func_align);
+      modulus = get_pointer_modulus_and_residue (op0, residue);
 
       op1 = TREE_OPERAND (expr, 1);
       STRIP_NOPS (op1);
@@ -11154,8 +11147,7 @@
 	  unsigned HOST_WIDE_INT modulus, residue;
 	  unsigned HOST_WIDE_INT low = TREE_INT_CST_LOW (arg1);
 
-	  modulus = get_pointer_modulus_and_residue (arg0, &residue,
-						     integer_onep (arg1));
+	  modulus = get_pointer_modulus_and_residue (arg0, &residue);
 
 	  /* This works because modulus is a power of 2.  If this weren't the
 	     case, we'd have to replace it by its greatest power-of-2
--- a/src/gcc/gengtype-lex.c
+++ b/src/gcc/gengtype-lex.c
@@ -55,7 +55,6 @@
 typedef unsigned char flex_uint8_t; 
 typedef unsigned short int flex_uint16_t;
 typedef unsigned int flex_uint32_t;
-#endif /* ! C99 */
 
 /* Limits of integral types. */
 #ifndef INT8_MIN
@@ -86,6 +85,8 @@
 #define UINT32_MAX             (4294967295U)
 #endif
 
+#endif /* ! C99 */
+
 #endif /* ! FLEXINT_H */
 
 #ifdef __cplusplus
@@ -142,7 +143,15 @@
 
 /* Size of default input buffer. */
 #ifndef YY_BUF_SIZE
+#ifdef __ia64__
+/* On IA-64, the buffer size is 16k, not 8k.
+ * Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case.
+ * Ditto for the __ia64__ case accordingly.
+ */
+#define YY_BUF_SIZE 32768
+#else
 #define YY_BUF_SIZE 16384
+#endif /* __ia64__ */
 #endif
 
 /* The state buf must be large enough to hold one state per character in the main buffer.
@@ -942,7 +951,7 @@
 #define YY_MORE_ADJ 0
 #define YY_RESTORE_YY_MORE_OFFSET
 char *yytext;
-#line 1 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 1 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 /* -*- indented-text -*- */
 /* Process source files and output type information.
    Copyright (C) 2002, 2003, 2004, 2005, 2007, 2008, 2009, 2010
@@ -964,7 +973,7 @@
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 #define YY_NO_INPUT 1
-#line 25 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 25 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 #include "bconfig.h"
 #include "system.h"
 
@@ -988,7 +997,7 @@
 }
 
 
-#line 991 "gengtype-lex.c"
+#line 1000 "gengtype-lex.c"
 
 #define INITIAL 0
 #define in_struct 1
@@ -1070,7 +1079,12 @@
 
 /* Amount of stuff to slurp up with each read. */
 #ifndef YY_READ_BUF_SIZE
+#ifdef __ia64__
+/* On IA-64, the buffer size is 16k, not 8k */
+#define YY_READ_BUF_SIZE 16384
+#else
 #define YY_READ_BUF_SIZE 8192
+#endif /* __ia64__ */
 #endif
 
 /* Copy whatever the last rule matched to the standard output. */
@@ -1089,7 +1103,7 @@
 	if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \
 		{ \
 		int c = '*'; \
-		unsigned n; \
+		size_t n; \
 		for ( n = 0; n < max_size && \
 			     (c = getc( yyin )) != EOF && c != '\n'; ++n ) \
 			buf[n] = (char) c; \
@@ -1174,7 +1188,7 @@
 	register char *yy_cp, *yy_bp;
 	register int yy_act;
     
-#line 59 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 59 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 
   /* Do this on entry to yylex():  */
   *yylval = 0;
@@ -1185,7 +1199,7 @@
     }
 
   /* Things we look for in skipping mode: */
-#line 1188 "gengtype-lex.c"
+#line 1202 "gengtype-lex.c"
 
 	if ( !(yy_init) )
 		{
@@ -1271,7 +1285,7 @@
 (yy_c_buf_p) = yy_cp -= 1;
 YY_DO_BEFORE_ACTION; /* set up yytext again */
 YY_RULE_SETUP
-#line 70 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 70 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 {
   BEGIN(in_struct);
   return TYPEDEF;
@@ -1283,7 +1297,7 @@
 (yy_c_buf_p) = yy_cp -= 1;
 YY_DO_BEFORE_ACTION; /* set up yytext again */
 YY_RULE_SETUP
-#line 74 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 74 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 {
   BEGIN(in_struct);
   return STRUCT;
@@ -1295,7 +1309,7 @@
 (yy_c_buf_p) = yy_cp -= 1;
 YY_DO_BEFORE_ACTION; /* set up yytext again */
 YY_RULE_SETUP
-#line 78 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 78 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 {
   BEGIN(in_struct);
   return UNION;
@@ -1307,7 +1321,7 @@
 (yy_c_buf_p) = yy_cp -= 1;
 YY_DO_BEFORE_ACTION; /* set up yytext again */
 YY_RULE_SETUP
-#line 82 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 82 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 {
   BEGIN(in_struct);
   return EXTERN;
@@ -1319,7 +1333,7 @@
 (yy_c_buf_p) = yy_cp -= 1;
 YY_DO_BEFORE_ACTION; /* set up yytext again */
 YY_RULE_SETUP
-#line 86 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 86 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 {
   BEGIN(in_struct);
   return STATIC;
@@ -1331,7 +1345,7 @@
 (yy_c_buf_p) = yy_cp -= 1;
 YY_DO_BEFORE_ACTION; /* set up yytext again */
 YY_RULE_SETUP
-#line 91 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 91 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 {
   BEGIN(in_struct);
   return DEFVEC_OP;
@@ -1343,7 +1357,7 @@
 (yy_c_buf_p) = yy_cp -= 1;
 YY_DO_BEFORE_ACTION; /* set up yytext again */
 YY_RULE_SETUP
-#line 95 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 95 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 {
   BEGIN(in_struct);
   return DEFVEC_I;
@@ -1355,7 +1369,7 @@
 (yy_c_buf_p) = yy_cp -= 1;
 YY_DO_BEFORE_ACTION; /* set up yytext again */
 YY_RULE_SETUP
-#line 99 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 99 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 {
   BEGIN(in_struct);
   return DEFVEC_ALLOC;
@@ -1365,19 +1379,19 @@
 
 case 9:
 YY_RULE_SETUP
-#line 107 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 107 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 { BEGIN(in_struct_comment); }
 	YY_BREAK
 case 10:
 /* rule 10 can match eol */
 YY_RULE_SETUP
-#line 109 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 109 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 { update_lineno (yytext, yyleng); }
 	YY_BREAK
 case 11:
 /* rule 11 can match eol */
 YY_RULE_SETUP
-#line 110 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 110 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 { lexer_line.line++; }
 	YY_BREAK
 case 12:
@@ -1386,7 +1400,7 @@
 (yy_c_buf_p) = yy_cp = yy_bp + 5;
 YY_DO_BEFORE_ACTION; /* set up yytext again */
 YY_RULE_SETUP
-#line 112 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 112 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 /* don't care */
 	YY_BREAK
 case 13:
@@ -1395,7 +1409,7 @@
 (yy_c_buf_p) = yy_cp = yy_bp + 3;
 YY_DO_BEFORE_ACTION; /* set up yytext again */
 YY_RULE_SETUP
-#line 113 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 113 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 { return GTY_TOKEN; }
 	YY_BREAK
 case 14:
@@ -1404,7 +1418,7 @@
 (yy_c_buf_p) = yy_cp = yy_bp + 3;
 YY_DO_BEFORE_ACTION; /* set up yytext again */
 YY_RULE_SETUP
-#line 114 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 114 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 { return VEC_TOKEN; }
 	YY_BREAK
 case 15:
@@ -1413,7 +1427,7 @@
 (yy_c_buf_p) = yy_cp = yy_bp + 5;
 YY_DO_BEFORE_ACTION; /* set up yytext again */
 YY_RULE_SETUP
-#line 115 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 115 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 { return UNION; }
 	YY_BREAK
 case 16:
@@ -1422,7 +1436,7 @@
 (yy_c_buf_p) = yy_cp = yy_bp + 6;
 YY_DO_BEFORE_ACTION; /* set up yytext again */
 YY_RULE_SETUP
-#line 116 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 116 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 { return STRUCT; }
 	YY_BREAK
 case 17:
@@ -1431,7 +1445,7 @@
 (yy_c_buf_p) = yy_cp = yy_bp + 4;
 YY_DO_BEFORE_ACTION; /* set up yytext again */
 YY_RULE_SETUP
-#line 117 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 117 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 { return ENUM; }
 	YY_BREAK
 case 18:
@@ -1440,7 +1454,7 @@
 (yy_c_buf_p) = yy_cp = yy_bp + 9;
 YY_DO_BEFORE_ACTION; /* set up yytext again */
 YY_RULE_SETUP
-#line 118 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 118 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 { return PTR_ALIAS; }
 	YY_BREAK
 case 19:
@@ -1449,12 +1463,12 @@
 (yy_c_buf_p) = yy_cp = yy_bp + 10;
 YY_DO_BEFORE_ACTION; /* set up yytext again */
 YY_RULE_SETUP
-#line 119 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 119 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 { return NESTED_PTR; }
 	YY_BREAK
 case 20:
 YY_RULE_SETUP
-#line 120 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 120 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 { return NUM; }
 	YY_BREAK
 case 21:
@@ -1463,7 +1477,7 @@
 (yy_c_buf_p) = yy_cp -= 1;
 YY_DO_BEFORE_ACTION; /* set up yytext again */
 YY_RULE_SETUP
-#line 121 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 121 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 {
   *yylval = XDUPVAR (const char, yytext, yyleng, yyleng+1);
   return PARAM_IS;
@@ -1474,11 +1488,11 @@
 *yy_cp = (yy_hold_char); /* undo effects of setting up yytext */
 (yy_c_buf_p) = yy_cp -= 1;
 YY_DO_BEFORE_ACTION; /* set up yytext again */
-#line 127 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 127 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 case 23:
 /* rule 23 can match eol */
 YY_RULE_SETUP
-#line 127 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 127 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 {
   size_t len;
 
@@ -1496,7 +1510,7 @@
 (yy_c_buf_p) = yy_cp -= 1;
 YY_DO_BEFORE_ACTION; /* set up yytext again */
 YY_RULE_SETUP
-#line 139 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 139 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 {
   *yylval = XDUPVAR (const char, yytext, yyleng, yyleng+1);
   return ID;
@@ -1505,7 +1519,7 @@
 case 25:
 /* rule 25 can match eol */
 YY_RULE_SETUP
-#line 144 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 144 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 {
   *yylval = XDUPVAR (const char, yytext+1, yyleng-2, yyleng-1);
   return STRING;
@@ -1515,7 +1529,7 @@
 case 26:
 /* rule 26 can match eol */
 YY_RULE_SETUP
-#line 149 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 149 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 {
   *yylval = XDUPVAR (const char, yytext+1, yyleng-2, yyleng-1);
   return ARRAY;
@@ -1524,7 +1538,7 @@
 case 27:
 /* rule 27 can match eol */
 YY_RULE_SETUP
-#line 153 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 153 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 {
   *yylval = XDUPVAR (const char, yytext+1, yyleng-2, yyleng);
   return CHAR;
@@ -1532,24 +1546,24 @@
 	YY_BREAK
 case 28:
 YY_RULE_SETUP
-#line 158 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 158 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 { return ELLIPSIS; }
 	YY_BREAK
 case 29:
 YY_RULE_SETUP
-#line 159 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 159 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 { return yytext[0]; }
 	YY_BREAK
 /* ignore pp-directives */
 case 30:
 /* rule 30 can match eol */
 YY_RULE_SETUP
-#line 162 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 162 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 {lexer_line.line++;}
 	YY_BREAK
 case 31:
 YY_RULE_SETUP
-#line 164 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 164 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 {
   error_at_line (&lexer_line, "unexpected character `%s'", yytext);
 }
@@ -1557,30 +1571,30 @@
 
 case 32:
 YY_RULE_SETUP
-#line 169 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 169 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 { BEGIN(in_comment); }
 	YY_BREAK
 case 33:
 /* rule 33 can match eol */
 YY_RULE_SETUP
-#line 170 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 170 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 { lexer_line.line++; }
 	YY_BREAK
 case 34:
-#line 172 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 172 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 case 35:
 /* rule 35 can match eol */
-#line 173 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 173 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 case 36:
 /* rule 36 can match eol */
 YY_RULE_SETUP
-#line 173 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 173 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 /* do nothing */
 	YY_BREAK
 case 37:
 /* rule 37 can match eol */
 YY_RULE_SETUP
-#line 174 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 174 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 { update_lineno (yytext, yyleng); }
 	YY_BREAK
 case 38:
@@ -1589,21 +1603,21 @@
 (yy_c_buf_p) = yy_cp = yy_bp + 1;
 YY_DO_BEFORE_ACTION; /* set up yytext again */
 YY_RULE_SETUP
-#line 175 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 175 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 /* do nothing */
 	YY_BREAK
 
 case 39:
 /* rule 39 can match eol */
 YY_RULE_SETUP
-#line 178 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 178 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 { lexer_line.line++; }
 	YY_BREAK
 case 40:
-#line 180 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 180 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 case 41:
 YY_RULE_SETUP
-#line 180 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 180 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 /* do nothing */
 	YY_BREAK
 case 42:
@@ -1612,25 +1626,25 @@
 (yy_c_buf_p) = yy_cp = yy_bp + 1;
 YY_DO_BEFORE_ACTION; /* set up yytext again */
 YY_RULE_SETUP
-#line 181 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 181 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 /* do nothing */
 	YY_BREAK
 
 case 43:
 YY_RULE_SETUP
-#line 183 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 183 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 { BEGIN(INITIAL); } 
 	YY_BREAK
 case 44:
 YY_RULE_SETUP
-#line 184 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 184 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 { BEGIN(in_struct); }
 	YY_BREAK
 case 45:
-#line 187 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 187 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 case 46:
 YY_RULE_SETUP
-#line 187 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 187 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 {
   error_at_line (&lexer_line, 
 		 "unterminated comment or string; unexpected EOF");
@@ -1639,15 +1653,15 @@
 case 47:
 /* rule 47 can match eol */
 YY_RULE_SETUP
-#line 192 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 192 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 /* do nothing */
 	YY_BREAK
 case 48:
 YY_RULE_SETUP
-#line 194 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 194 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 YY_FATAL_ERROR( "flex scanner jammed" );
 	YY_BREAK
-#line 1650 "gengtype-lex.c"
+#line 1664 "gengtype-lex.c"
 case YY_STATE_EOF(INITIAL):
 case YY_STATE_EOF(in_struct):
 case YY_STATE_EOF(in_struct_comment):
@@ -2371,8 +2385,8 @@
 
 /** Setup the input buffer state to scan the given bytes. The next call to yylex() will
  * scan from a @e copy of @a bytes.
- * @param bytes the byte buffer to scan
- * @param len the number of bytes in the buffer pointed to by @a bytes.
+ * @param yybytes the byte buffer to scan
+ * @param _yybytes_len the number of bytes in the buffer pointed to by @a bytes.
  * 
  * @return the newly allocated buffer state object.
  */
@@ -2611,7 +2625,7 @@
 
 #define YYTABLES_NAME "yytables"
 
-#line 194 "/home/jakub/gcc-4.6.4/gcc-4.6.4/gcc/gengtype-lex.l"
+#line 194 "/home/doko/gcc-4.6.4-RC-20130412/gcc-4.6.4-RC-20130412/gcc/gengtype-lex.l"
 
 
 
--- a/src/gcc/genopinit.c
+++ b/src/gcc/genopinit.c
@@ -46,10 +46,12 @@
    used.  $A and $B are replaced with the full name of the mode; $a and $b
    are replaced with the short form of the name, as above.
 
-   If $N is present in the pattern, it means the two modes must be consecutive
-   widths in the same mode class (e.g, QImode and HImode).  $I means that
-   only full integer modes should be considered for the next mode, and $F
-   means that only float modes should be considered.
+   If $N is present in the pattern, it means the two modes must be in
+   the same mode class, and $b must be greater than $a (e.g, QImode
+   and HImode).
+
+   $I means that only full integer modes should be considered for the
+   next mode, and $F means that only float modes should be considered.
    $P means that both full and partial integer modes should be considered.
    $Q means that only fixed-point modes should be considered.
 
@@ -74,6 +76,8 @@
   "set_convert_optab_handler (fractuns_optab, $B, $A, CODE_FOR_$(fractuns$Q$a$I$b2$))",
   "set_convert_optab_handler (satfract_optab, $B, $A, CODE_FOR_$(satfract$a$Q$b2$))",
   "set_convert_optab_handler (satfractuns_optab, $B, $A, CODE_FOR_$(satfractuns$I$a$Q$b2$))",
+  "set_convert_optab_handler (vec_load_lanes_optab, $A, $B, CODE_FOR_$(vec_load_lanes$a$b$))",
+  "set_convert_optab_handler (vec_store_lanes_optab, $A, $B, CODE_FOR_$(vec_store_lanes$a$b$))",
   "set_optab_handler (add_optab, $A, CODE_FOR_$(add$P$a3$))",
   "set_optab_handler (addv_optab, $A, CODE_FOR_$(add$F$a3$)),\n\
     set_optab_handler (add_optab, $A, CODE_FOR_$(add$F$a3$))",
@@ -97,17 +101,17 @@
   "set_optab_handler (smulv_optab, $A, CODE_FOR_$(mulv$I$a3$))",
   "set_optab_handler (umul_highpart_optab, $A, CODE_FOR_$(umul$a3_highpart$))",
   "set_optab_handler (smul_highpart_optab, $A, CODE_FOR_$(smul$a3_highpart$))",
-  "set_optab_handler (smul_widen_optab, $B, CODE_FOR_$(mul$a$b3$)$N)",
-  "set_optab_handler (umul_widen_optab, $B, CODE_FOR_$(umul$a$b3$)$N)",
-  "set_optab_handler (usmul_widen_optab, $B, CODE_FOR_$(usmul$a$b3$)$N)",
-  "set_optab_handler (smadd_widen_optab, $B, CODE_FOR_$(madd$a$b4$)$N)",
-  "set_optab_handler (umadd_widen_optab, $B, CODE_FOR_$(umadd$a$b4$)$N)",
-  "set_optab_handler (ssmadd_widen_optab, $B, CODE_FOR_$(ssmadd$a$b4$)$N)",
-  "set_optab_handler (usmadd_widen_optab, $B, CODE_FOR_$(usmadd$a$b4$)$N)",
-  "set_optab_handler (smsub_widen_optab, $B, CODE_FOR_$(msub$a$b4$)$N)",
-  "set_optab_handler (umsub_widen_optab, $B, CODE_FOR_$(umsub$a$b4$)$N)",
-  "set_optab_handler (ssmsub_widen_optab, $B, CODE_FOR_$(ssmsub$a$b4$)$N)",
-  "set_optab_handler (usmsub_widen_optab, $B, CODE_FOR_$(usmsub$a$b4$)$N)",
+  "set_widening_optab_handler (smul_widen_optab, $B, $A, CODE_FOR_$(mul$a$b3$)$N)",
+  "set_widening_optab_handler (umul_widen_optab, $B, $A, CODE_FOR_$(umul$a$b3$)$N)",
+  "set_widening_optab_handler (usmul_widen_optab, $B, $A, CODE_FOR_$(usmul$a$b3$)$N)",
+  "set_widening_optab_handler (smadd_widen_optab, $B, $A, CODE_FOR_$(madd$a$b4$)$N)",
+  "set_widening_optab_handler (umadd_widen_optab, $B, $A, CODE_FOR_$(umadd$a$b4$)$N)",
+  "set_widening_optab_handler (ssmadd_widen_optab, $B, $A, CODE_FOR_$(ssmadd$a$b4$)$N)",
+  "set_widening_optab_handler (usmadd_widen_optab, $B, $A, CODE_FOR_$(usmadd$a$b4$)$N)",
+  "set_widening_optab_handler (smsub_widen_optab, $B, $A, CODE_FOR_$(msub$a$b4$)$N)",
+  "set_widening_optab_handler (umsub_widen_optab, $B, $A, CODE_FOR_$(umsub$a$b4$)$N)",
+  "set_widening_optab_handler (ssmsub_widen_optab, $B, $A, CODE_FOR_$(ssmsub$a$b4$)$N)",
+  "set_widening_optab_handler (usmsub_widen_optab, $B, $A, CODE_FOR_$(usmsub$a$b4$)$N)",
   "set_optab_handler (sdiv_optab, $A, CODE_FOR_$(div$a3$))",
   "set_optab_handler (ssdiv_optab, $A, CODE_FOR_$(ssdiv$Q$a3$))",
   "set_optab_handler (sdivv_optab, $A, CODE_FOR_$(div$V$I$a3$))",
@@ -264,6 +268,10 @@
   "set_optab_handler (vec_widen_umult_lo_optab, $A, CODE_FOR_$(vec_widen_umult_lo_$a$))",
   "set_optab_handler (vec_widen_smult_hi_optab, $A, CODE_FOR_$(vec_widen_smult_hi_$a$))",
   "set_optab_handler (vec_widen_smult_lo_optab, $A, CODE_FOR_$(vec_widen_smult_lo_$a$))",
+  "set_optab_handler (vec_widen_ushiftl_hi_optab, $A, CODE_FOR_$(vec_widen_ushiftl_hi_$a$))",
+  "set_optab_handler (vec_widen_ushiftl_lo_optab, $A, CODE_FOR_$(vec_widen_ushiftl_lo_$a$))",
+  "set_optab_handler (vec_widen_sshiftl_hi_optab, $A, CODE_FOR_$(vec_widen_sshiftl_hi_$a$))",
+  "set_optab_handler (vec_widen_sshiftl_lo_optab, $A, CODE_FOR_$(vec_widen_sshiftl_lo_$a$))",
   "set_optab_handler (vec_unpacks_hi_optab, $A, CODE_FOR_$(vec_unpacks_hi_$a$))",
   "set_optab_handler (vec_unpacks_lo_optab, $A, CODE_FOR_$(vec_unpacks_lo_$a$))",
   "set_optab_handler (vec_unpacku_hi_optab, $A, CODE_FOR_$(vec_unpacku_hi_$a$))",
@@ -302,7 +310,7 @@
     {
       int force_float = 0, force_int = 0, force_partial_int = 0;
       int force_fixed = 0;
-      int force_consec = 0;
+      int force_wider = 0;
       int matches = 1;
 
       for (pp = optabs[pindex]; pp[0] != '$' || pp[1] != '('; pp++)
@@ -320,7 +328,7 @@
 	    switch (*++pp)
 	      {
 	      case 'N':
-		force_consec = 1;
+		force_wider = 1;
 		break;
 	      case 'I':
 		force_int = 1;
@@ -389,7 +397,10 @@
 			    || mode_class[i] == MODE_VECTOR_FRACT
 			    || mode_class[i] == MODE_VECTOR_UFRACT
 			    || mode_class[i] == MODE_VECTOR_ACCUM
-			    || mode_class[i] == MODE_VECTOR_UACCUM))
+			    || mode_class[i] == MODE_VECTOR_UACCUM)
+			&& (! force_wider
+			    || *pp == 'a'
+			    || m1 < i))
 		      break;
 		  }
 
@@ -409,8 +420,7 @@
 	}
 
       if (matches && pp[0] == '$' && pp[1] == ')'
-	  && *np == 0
-	  && (! force_consec || (int) GET_MODE_WIDER_MODE(m1) == m2))
+	  && *np == 0)
 	break;
     }
 
--- a/src/gcc/gimple.c
+++ b/src/gcc/gimple.c
@@ -276,6 +276,59 @@
 }
 
 
+/* Helper for gimple_build_call_internal and gimple_build_call_internal_vec.
+   Build the basic components of a GIMPLE_CALL statement to internal
+   function FN with NARGS arguments.  */
+
+static inline gimple
+gimple_build_call_internal_1 (enum internal_fn fn, unsigned nargs)
+{
+  gimple s = gimple_build_with_ops (GIMPLE_CALL, ERROR_MARK, nargs + 3);
+  s->gsbase.subcode |= GF_CALL_INTERNAL;
+  gimple_call_set_internal_fn (s, fn);
+  gimple_call_reset_alias_info (s);
+  return s;
+}
+
+
+/* Build a GIMPLE_CALL statement to internal function FN.  NARGS is
+   the number of arguments.  The ... are the arguments.  */
+
+gimple
+gimple_build_call_internal (enum internal_fn fn, unsigned nargs, ...)
+{
+  va_list ap;
+  gimple call;
+  unsigned i;
+
+  call = gimple_build_call_internal_1 (fn, nargs);
+  va_start (ap, nargs);
+  for (i = 0; i < nargs; i++)
+    gimple_call_set_arg (call, i, va_arg (ap, tree));
+  va_end (ap);
+
+  return call;
+}
+
+
+/* Build a GIMPLE_CALL statement to internal function FN with the arguments
+   specified in vector ARGS.  */
+
+gimple
+gimple_build_call_internal_vec (enum internal_fn fn, VEC(tree, heap) *args)
+{
+  unsigned i, nargs;
+  gimple call;
+
+  nargs = VEC_length (tree, args);
+  call = gimple_build_call_internal_1 (fn, nargs);
+  for (i = 0; i < nargs; i++)
+    gimple_call_set_arg (call, i, VEC_index (tree, args, i));
+
+  return call;
+}
+
+
 /* Build a GIMPLE_CALL statement from CALL_EXPR T.  Note that T is
    assumed to be in GIMPLE form already.  Minimal checking is done of
    this fact.  */
@@ -1774,6 +1827,20 @@
   return (gimple_body (fndecl) || (fn && fn->cfg));
 }
 
+/* Return true if calls C1 and C2 are known to go to the same function.  */
+
+bool
+gimple_call_same_target_p (const_gimple c1, const_gimple c2)
+{
+  if (gimple_call_internal_p (c1))
+    return (gimple_call_internal_p (c2)
+	    && gimple_call_internal_fn (c1) == gimple_call_internal_fn (c2));
+  else
+    return (gimple_call_fn (c1) == gimple_call_fn (c2)
+	    || (gimple_call_fndecl (c1)
+		&& gimple_call_fndecl (c1) == gimple_call_fndecl (c2)));
+}
+
 /* Detect flags from a GIMPLE_CALL.  This is just like
    call_expr_flags, but for gimple tuples.  */
 
@@ -1786,6 +1853,8 @@
 
   if (decl)
     flags = flags_from_decl_or_type (decl);
+  else if (gimple_call_internal_p (stmt))
+    flags = internal_fn_flags (gimple_call_internal_fn (stmt));
   else
     {
       t = TREE_TYPE (gimple_call_fn (stmt));
@@ -1801,18 +1870,35 @@
   return flags;
 }
 
+/* Return the "fn spec" string for call STMT.  */
+
+static tree
+gimple_call_fnspec (const_gimple stmt)
+{
+  tree fn, type, attr;
+
+  fn = gimple_call_fn (stmt);
+  if (!fn)
+    return NULL_TREE;
+
+  type = TREE_TYPE (TREE_TYPE (fn));
+  if (!type)
+    return NULL_TREE;
+
+  attr = lookup_attribute ("fn spec", TYPE_ATTRIBUTES (type));
+  if (!attr)
+    return NULL_TREE;
+
+  return TREE_VALUE (TREE_VALUE (attr));
+}
+
 /* Detects argument flags for argument number ARG on call STMT.  */
 
 int
 gimple_call_arg_flags (const_gimple stmt, unsigned arg)
 {
-  tree type = TREE_TYPE (TREE_TYPE (gimple_call_fn (stmt)));
-  tree attr = lookup_attribute ("fn spec", TYPE_ATTRIBUTES (type));
-  if (!attr)
-    return 0;
-
-  attr = TREE_VALUE (TREE_VALUE (attr));
-  if (1 + arg >= (unsigned) TREE_STRING_LENGTH (attr))
+  tree attr = gimple_call_fnspec (stmt);
+  if (!attr || 1 + arg >= (unsigned) TREE_STRING_LENGTH (attr))
     return 0;
 
   switch (TREE_STRING_POINTER (attr)[1 + arg])
@@ -1850,13 +1936,8 @@
   if (gimple_call_flags (stmt) & ECF_MALLOC)
     return ERF_NOALIAS;
 
-  type = TREE_TYPE (TREE_TYPE (gimple_call_fn (stmt)));
-  attr = lookup_attribute ("fn spec", TYPE_ATTRIBUTES (type));
-  if (!attr)
-    return 0;
-
-  attr = TREE_VALUE (TREE_VALUE (attr));
-  if (TREE_STRING_LENGTH (attr) < 1)
+  attr = gimple_call_fnspec (stmt);
+  if (!attr || TREE_STRING_LENGTH (attr) < 1)
     return 0;
 
   switch (TREE_STRING_POINTER (attr)[0])
@@ -2317,14 +2398,15 @@
   if (is_gimple_call (s))
     {
       unsigned nargs = gimple_call_num_args (s);
+      tree fn;
 
       if (!(gimple_call_flags (s) & (ECF_CONST | ECF_PURE)))
         return true;
 
       /* We cannot use gimple_has_volatile_ops here,
          because we must ignore a volatile LHS.  */
-      if (TREE_SIDE_EFFECTS (gimple_call_fn (s))
-          || TREE_THIS_VOLATILE (gimple_call_fn (s)))
+      fn = gimple_call_fn (s);
+      if (fn && (TREE_SIDE_EFFECTS (fn) || TREE_THIS_VOLATILE (fn)))
 	{
 	  gcc_assert (gimple_has_volatile_ops (s));
 	  return true;
@@ -3081,7 +3163,6 @@
 gimple_call_copy_skip_args (gimple stmt, bitmap args_to_skip)
 {
   int i;
-  tree fn = gimple_call_fn (stmt);
   int nargs = gimple_call_num_args (stmt);
   VEC(tree, heap) *vargs = VEC_alloc (tree, heap, nargs);
   gimple new_stmt;
@@ -3090,7 +3171,11 @@
     if (!bitmap_bit_p (args_to_skip, i))
       VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i));
 
-  new_stmt = gimple_build_call_vec (fn, vargs);
+  if (gimple_call_internal_p (stmt))
+    new_stmt = gimple_build_call_internal_vec (gimple_call_internal_fn (stmt),
+					       vargs);
+  else
+    new_stmt = gimple_build_call_vec (gimple_call_fn (stmt), vargs);
   VEC_free (tree, heap, vargs);
   if (gimple_call_lhs (stmt))
     gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
--- a/src/gcc/gimple.h
+++ b/src/gcc/gimple.h
@@ -30,6 +30,7 @@
 #include "basic-block.h"
 #include "tree-ssa-operands.h"
 #include "tree-ssa-alias.h"
+#include "internal-fn.h"
 
 struct gimple_seq_node_d;
 typedef struct gimple_seq_node_d *gimple_seq_node;
@@ -82,6 +83,8 @@
 			   name, a _DECL, a _REF, etc.  */
 };
 
+#define GF_CALL_INTERNAL_FN_SHIFT 8
+
 /* Specific flags for individual GIMPLE statements.  These flags are
    always stored in gimple_statement_base.subcode and they may only be
    defined for statement codes that do not use sub-codes.
@@ -102,6 +105,8 @@
     GF_CALL_TAILCALL		= 1 << 3,
     GF_CALL_VA_ARG_PACK		= 1 << 4,
     GF_CALL_NOTHROW		= 1 << 5,
+    GF_CALL_INTERNAL		= 1 << 6,
+    GF_CALL_INTERNAL_FN		= 0xff << GF_CALL_INTERNAL_FN_SHIFT,
     GF_OMP_PARALLEL_COMBINED	= 1 << 0,
 
     /* True on an GIMPLE_OMP_RETURN statement if the return does not require
@@ -817,6 +822,8 @@
 
 gimple gimple_build_call_vec (tree, VEC(tree, heap) *);
 gimple gimple_build_call (tree, unsigned, ...);
+gimple gimple_build_call_internal (enum internal_fn, unsigned, ...);
+gimple gimple_build_call_internal_vec (enum internal_fn, VEC(tree, heap) *);
 gimple gimple_build_call_from_tree (tree);
 gimple gimplify_assign (tree, tree, gimple_seq *);
 gimple gimple_build_cond (enum tree_code, tree, tree, tree, tree);
@@ -861,6 +868,7 @@
 void gimple_seq_free (gimple_seq);
 void gimple_seq_add_seq (gimple_seq *, gimple_seq);
 gimple_seq gimple_seq_copy (gimple_seq);
+bool gimple_call_same_target_p (const_gimple, const_gimple);
 int gimple_call_flags (const_gimple);
 int gimple_call_return_flags (const_gimple);
 int gimple_call_arg_flags (const_gimple, unsigned);
@@ -2012,6 +2020,27 @@
 }
 
 
+/* Return true if call GS calls an internal-only function, as enumerated
+   by internal_fn.  */
+
+static inline bool
+gimple_call_internal_p (const_gimple gs)
+{
+  GIMPLE_CHECK (gs, GIMPLE_CALL);
+  return (gs->gsbase.subcode & GF_CALL_INTERNAL) != 0;
+}
+
+
+/* Return the target of internal call GS.  */
+
+static inline enum internal_fn
+gimple_call_internal_fn (const_gimple gs)
+{
+  gcc_assert (gimple_call_internal_p (gs));
+  return (enum internal_fn) (gs->gsbase.subcode >> GF_CALL_INTERNAL_FN_SHIFT);
+}
+
+
 /* Return a pointer to the tree node representing the function called by call
    statement GS.  */
 
@@ -2029,6 +2058,7 @@
 gimple_call_set_fn (gimple gs, tree fn)
 {
   GIMPLE_CHECK (gs, GIMPLE_CALL);
+  gcc_assert (!gimple_call_internal_p (gs));
   gimple_set_op (gs, 1, fn);
 }
 
@@ -2039,10 +2069,23 @@
 gimple_call_set_fndecl (gimple gs, tree decl)
 {
   GIMPLE_CHECK (gs, GIMPLE_CALL);
+  gcc_assert (!gimple_call_internal_p (gs));
   gimple_set_op (gs, 1, build_fold_addr_expr_loc (gimple_location (gs), decl));
 }
 
 
+/* Set internal function FN to be the function called by call statement GS.  */
+
+static inline void
+gimple_call_set_internal_fn (gimple gs, enum internal_fn fn)
+{
+  GIMPLE_CHECK (gs, GIMPLE_CALL);
+  gcc_assert (gimple_call_internal_p (gs));
+  gs->gsbase.subcode &= ~GF_CALL_INTERNAL_FN;
+  gs->gsbase.subcode |= (int) fn << GF_CALL_INTERNAL_FN_SHIFT;
+}
+
+
 /* If a given GIMPLE_CALL's callee is a FUNCTION_DECL, return it.
    Otherwise return NULL.  This function is analogous to
    get_callee_fndecl in tree land.  */
@@ -2051,7 +2094,7 @@
 gimple_call_fndecl (const_gimple gs)
 {
   tree addr = gimple_call_fn (gs);
-  if (TREE_CODE (addr) == ADDR_EXPR)
+  if (addr && TREE_CODE (addr) == ADDR_EXPR)
     {
       tree fndecl = TREE_OPERAND (addr, 0);
       if (TREE_CODE (fndecl) == MEM_REF)
@@ -2073,8 +2116,13 @@
 static inline tree
 gimple_call_return_type (const_gimple gs)
 {
-  tree fn = gimple_call_fn (gs);
-  tree type = TREE_TYPE (fn);
+  tree fn, type;
+
+  fn = gimple_call_fn (gs);
+  if (fn == NULL_TREE)
+    return TREE_TYPE (gimple_call_lhs (gs));
+
+  type = TREE_TYPE (fn);
 
   /* See through the pointer.  */
   type = TREE_TYPE (type);
--- a/src/gcc/gimple-low.c
+++ b/src/gcc/gimple-low.c
@@ -218,6 +218,10 @@
   tree fndecl, parms, p;
   unsigned int i, nargs;
 
+  /* Calls to internal functions always match their signature.  */
+  if (gimple_call_internal_p (stmt))
+    return true;
+
   nargs = gimple_call_num_args (stmt);
 
   /* Get argument types for verification.  */
--- a/src/gcc/gimple-pretty-print.c
+++ b/src/gcc/gimple-pretty-print.c
@@ -343,6 +343,8 @@
     case VEC_EXTRACT_ODD_EXPR:
     case VEC_INTERLEAVE_HIGH_EXPR:
     case VEC_INTERLEAVE_LOW_EXPR:
+    case VEC_WIDEN_LSHIFT_HI_EXPR:
+    case VEC_WIDEN_LSHIFT_LO_EXPR:
       for (p = tree_code_name [(int) code]; *p; p++)
 	pp_character (buffer, TOUPPER (*p));
       pp_string (buffer, " <");
@@ -596,8 +598,12 @@
 
   if (flags & TDF_RAW)
     {
-      dump_gimple_fmt (buffer, spc, flags, "%G <%T, %T",
-                     gs, gimple_call_fn (gs), lhs);
+      if (gimple_call_internal_p (gs))
+	dump_gimple_fmt (buffer, spc, flags, "%G <%s, %T", gs,
+			 internal_fn_name (gimple_call_internal_fn (gs)), lhs);
+      else
+	dump_gimple_fmt (buffer, spc, flags, "%G <%T, %T",
+			 gs, gimple_call_fn (gs), lhs);
       if (gimple_call_num_args (gs) > 0)
         {
           pp_string (buffer, ", ");
@@ -617,7 +623,10 @@
 
 	  pp_space (buffer);
         }
-      print_call_name (buffer, gimple_call_fn (gs), flags);
+      if (gimple_call_internal_p (gs))
+	pp_string (buffer, internal_fn_name (gimple_call_internal_fn (gs)));
+      else
+	print_call_name (buffer, gimple_call_fn (gs), flags);
       pp_string (buffer, " (");
       dump_gimple_call_args (buffer, gs, flags);
       pp_character (buffer, ')');
--- a/src/gcc/gimplify.c
+++ b/src/gcc/gimplify.c
@@ -3712,9 +3712,8 @@
     case ARRAY_TYPE:
       {
 	struct gimplify_init_ctor_preeval_data preeval_data;
-	HOST_WIDE_INT num_type_elements, num_ctor_elements;
-	HOST_WIDE_INT num_nonzero_elements;
-	bool cleared, valid_const_initializer;
+	HOST_WIDE_INT num_ctor_elements, num_nonzero_elements;
+	bool cleared, complete_p, valid_const_initializer;
 
 	/* Aggregate types must lower constructors to initialization of
 	   individual elements.  The exception is that a CONSTRUCTOR node
@@ -3731,7 +3730,7 @@
 	   can only do so if it known to be a valid constant initializer.  */
 	valid_const_initializer
 	  = categorize_ctor_elements (ctor, &num_nonzero_elements,
-				      &num_ctor_elements, &cleared);
+				      &num_ctor_elements, &complete_p);
 
 	/* If a const aggregate variable is being initialized, then it
 	   should never be a lose to promote the variable to be static.  */
@@ -3769,26 +3768,29 @@
 	   parts in, then generate code for the non-constant parts.  */
 	/* TODO.  There's code in cp/typeck.c to do this.  */
 
-	num_type_elements = count_type_elements (type, true);
-
-	/* If count_type_elements could not determine number of type elements
-	   for a constant-sized object, assume clearing is needed.
-	   Don't do this for variable-sized objects, as store_constructor
-	   will ignore the clearing of variable-sized objects.  */
-	if (num_type_elements < 0 && int_size_in_bytes (type) >= 0)
+	if (int_size_in_bytes (TREE_TYPE (ctor)) < 0)
+	  /* store_constructor will ignore the clearing of variable-sized
+	     objects.  Initializers for such objects must explicitly set
+	     every field that needs to be set.  */
+	  cleared = false;
+	else if (!complete_p)
+	  /* If the constructor isn't complete, clear the whole object
+	     beforehand.
+
+	     ??? This ought not to be needed.  For any element not present
+	     in the initializer, we should simply set them to zero.  Except
+	     we'd need to *find* the elements that are not present, and that
+	     requires trickery to avoid quadratic compile-time behavior in
+	     large cases or excessive memory use in small cases.  */
 	  cleared = true;
-	/* If there are "lots" of zeros, then block clear the object first.  */
-	else if (num_type_elements - num_nonzero_elements
+	else if (num_ctor_elements - num_nonzero_elements
 		 > CLEAR_RATIO (optimize_function_for_speed_p (cfun))
-		 && num_nonzero_elements < num_type_elements/4)
-	  cleared = true;
-	/* ??? This bit ought not be needed.  For any element not present
-	   in the initializer, we should simply set them to zero.  Except
-	   we'd need to *find* the elements that are not present, and that
-	   requires trickery to avoid quadratic compile-time behavior in
-	   large cases or excessive memory use in small cases.  */
-	else if (num_ctor_elements < num_type_elements)
+		 && num_nonzero_elements < num_ctor_elements / 4)
+	  /* If there are "lots" of zeros, it's more efficient to clear
+	     the memory and then set the nonzero elements.  */
 	  cleared = true;
+	else
+	  cleared = false;
 
 	/* If there are "lots" of initialized elements, and all of them
 	   are valid address constants, then the entire initializer can
--- a/src/gcc/graphite-sese-to-poly.c
+++ b/src/gcc/graphite-sese-to-poly.c
@@ -1721,7 +1721,7 @@
 
   FOR_EACH_VEC_ELT (data_reference_p, drs, i, dr1)
     for (j = i + 1; VEC_iterate (data_reference_p, drs, j, dr2); j++)
-      if (dr_may_alias_p (dr1, dr2))
+      if (dr_may_alias_p (dr1, dr2, true))
 	edge_num++;
 
   fprintf (file, "$\n");
@@ -1733,7 +1733,7 @@
 
   FOR_EACH_VEC_ELT (data_reference_p, drs, i, dr1)
     for (j = i + 1; VEC_iterate (data_reference_p, drs, j, dr2); j++)
-      if (dr_may_alias_p (dr1, dr2))
+      if (dr_may_alias_p (dr1, dr2, true))
 	fprintf (file, "e %d %d\n", i + 1, j + 1);
 
   return true;
@@ -1763,7 +1763,7 @@
 
   FOR_EACH_VEC_ELT (data_reference_p, drs, i, dr1)
     for (j = i + 1; VEC_iterate (data_reference_p, drs, j, dr2); j++)
-      if (dr_may_alias_p (dr1, dr2))
+      if (dr_may_alias_p (dr1, dr2, true))
 	fprintf (file, "n%d n%d\n", i, j);
 
   return true;
@@ -1789,7 +1789,7 @@
 
   FOR_EACH_VEC_ELT (data_reference_p, drs, i, dr1)
     for (j = i + 1; VEC_iterate (data_reference_p, drs, j, dr2); j++)
-      if (dr_may_alias_p (dr1, dr2))
+      if (dr_may_alias_p (dr1, dr2, true))
 	fprintf (file, "%d %d\n", i, j);
 
   return true;
@@ -1825,7 +1825,7 @@
 
   FOR_EACH_VEC_ELT (data_reference_p, drs, i, dr1)
     for (j = i+1; VEC_iterate (data_reference_p, drs, j, dr2); j++)
-      if (dr_may_alias_p (dr1, dr2))
+      if (dr_may_alias_p (dr1, dr2, true))
 	{
 	  add_edge (g, i, j);
 	  add_edge (g, j, i);
--- a/src/gcc/haifa-sched.c
+++ b/src/gcc/haifa-sched.c
@@ -348,6 +348,14 @@
 /* Create empty basic block after the specified block.  */
 basic_block (* sched_create_empty_bb) (basic_block);
 
+/* Return the number of cycles until INSN is expected to be ready.
+   Return zero if it already is.  */
+static int
+insn_delay (rtx insn)
+{
+  return MAX (INSN_TICK (insn) - clock_var, 0);
+}
+
 static int
 may_trap_exp (const_rtx x, int is_store)
 {
@@ -571,10 +579,10 @@
 
 /* Do register pressure sensitive insn scheduling if the flag is set
    up.  */
-bool sched_pressure_p;
+enum sched_pressure_algorithm sched_pressure;
 
 /* Map regno -> its cover class.  The map defined only when
-   SCHED_PRESSURE_P is true.  */
+   SCHED_PRESSURE != SCHED_PRESSURE_NONE.  */
 enum reg_class *sched_regno_cover_class;
 
 /* The current register pressure.  Only elements corresponding cover
@@ -602,10 +610,12 @@
   bitmap_clear (region_ref_regs);
 }
 
-/* Update current register pressure related info after birth (if
-   BIRTH_P) or death of register REGNO.  */
-static void
-mark_regno_birth_or_death (int regno, bool birth_p)
+/* PRESSURE[CL] describes the pressure on register class CL.  Update it
+   for the birth (if BIRTH_P) or death (if !BIRTH_P) of register REGNO.
+   LIVE tracks the set of live registers; if it is null, assume that
+   every birth or death is genuine.  */
+static inline void
+mark_regno_birth_or_death (bitmap live, int *pressure, int regno, bool birth_p)
 {
   enum reg_class cover_class;
 
@@ -616,15 +626,17 @@
 	{
 	  if (birth_p)
 	    {
-	      bitmap_set_bit (curr_reg_live, regno);
-	      curr_reg_pressure[cover_class]
-		+= ira_reg_class_nregs[cover_class][PSEUDO_REGNO_MODE (regno)];
+	      if (!live || bitmap_set_bit (live, regno))
+		pressure[cover_class]
+		  += (ira_reg_class_nregs
+		      [cover_class][PSEUDO_REGNO_MODE (regno)]);
 	    }
 	  else
 	    {
-	      bitmap_clear_bit (curr_reg_live, regno);
-	      curr_reg_pressure[cover_class]
-		-= ira_reg_class_nregs[cover_class][PSEUDO_REGNO_MODE (regno)];
+	      if (!live || bitmap_clear_bit (live, regno))
+		pressure[cover_class]
+		  -= (ira_reg_class_nregs
+		      [cover_class][PSEUDO_REGNO_MODE (regno)]);
 	    }
 	}
     }
@@ -633,13 +645,13 @@
     {
       if (birth_p)
 	{
-	  bitmap_set_bit (curr_reg_live, regno);
-	  curr_reg_pressure[cover_class]++;
+	  if (!live || bitmap_set_bit (live, regno))
+	    pressure[cover_class]++;
 	}
       else
 	{
-	  bitmap_clear_bit (curr_reg_live, regno);
-	  curr_reg_pressure[cover_class]--;
+	  if (!live || bitmap_clear_bit (live, regno))
+	    pressure[cover_class]--;
 	}
     }
 }
@@ -657,8 +669,10 @@
     curr_reg_pressure[ira_reg_class_cover[i]] = 0;
   bitmap_clear (curr_reg_live);
   EXECUTE_IF_SET_IN_BITMAP (live, 0, j, bi)
-    if (current_nr_blocks == 1 || bitmap_bit_p (region_ref_regs, j))
-      mark_regno_birth_or_death (j, true);
+    if (sched_pressure == SCHED_PRESSURE_MODEL
+	|| current_nr_blocks == 1
+	|| bitmap_bit_p (region_ref_regs, j))
+      mark_regno_birth_or_death (curr_reg_live, curr_reg_pressure, j, true);
 }
 
 /* Mark registers in X as mentioned in the current region.  */
@@ -712,7 +726,8 @@
 	if (regno == INVALID_REGNUM)
 	  break;
 	if (! bitmap_bit_p (df_get_live_in (bb), regno))
-	  mark_regno_birth_or_death (regno, true);
+	  mark_regno_birth_or_death (curr_reg_live, curr_reg_pressure,
+				     regno, true);
       }
 #endif
 }
@@ -956,19 +971,19 @@
   return true;
 }
 
-/* Compute the number of nondebug forward deps of an insn.  */
+/* Compute the number of nondebug deps in list LIST for INSN.  */
 
 static int
-dep_list_size (rtx insn)
+dep_list_size (rtx insn, sd_list_types_def list)
 {
   sd_iterator_def sd_it;
   dep_t dep;
   int dbgcount = 0, nodbgcount = 0;
 
   if (!MAY_HAVE_DEBUG_INSNS)
-    return sd_lists_size (insn, SD_LIST_FORW);
+    return sd_lists_size (insn, list);
 
-  FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
+  FOR_EACH_DEP (insn, list, sd_it, dep)
     {
       if (DEBUG_INSN_P (DEP_CON (dep)))
 	dbgcount++;
@@ -976,7 +991,7 @@
 	nodbgcount++;
     }
 
-  gcc_assert (dbgcount + nodbgcount == sd_lists_size (insn, SD_LIST_FORW));
+  gcc_assert (dbgcount + nodbgcount == sd_lists_size (insn, list));
 
   return nodbgcount;
 }
@@ -995,7 +1010,7 @@
     {
       int this_priority = -1;
 
-      if (dep_list_size (insn) == 0)
+      if (dep_list_size (insn, SD_LIST_FORW) == 0)
 	/* ??? We should set INSN_PRIORITY to insn_cost when and insn has
 	   some forward deps but all of them are ignored by
 	   contributes_to_priority hook.  At the moment we set priority of
@@ -1091,6 +1106,22 @@
          qsort (READY, N_READY, sizeof (rtx), rank_for_schedule); }  \
 while (0)
 
+/* For each cover class CL, set DEATH[CL] to the number of registers
+   in that class that die in INSN.  */
+
+static void
+calculate_reg_deaths (rtx insn, int *death)
+{
+  int i;
+  struct reg_use_data *use;
+
+  for (i = 0; i < ira_reg_class_cover_size; i++)
+    death[ira_reg_class_cover[i]] = 0;
+  for (use = INSN_REG_USE_LIST (insn); use != NULL; use = use->next_insn_use)
+    if (dying_use_p (use))
+      mark_regno_birth_or_death (0, death, use->regno, true);
+}
+
 /* Setup info about the current register pressure impact of scheduling
    INSN at the current scheduling point.  */
 static void
@@ -1102,23 +1133,12 @@
   enum reg_class cl;
   struct reg_pressure_data *pressure_info;
   int *max_reg_pressure;
-  struct reg_use_data *use;
   static int death[N_REG_CLASSES];
 
   gcc_checking_assert (!DEBUG_INSN_P (insn));
 
   excess_cost_change = 0;
-  for (i = 0; i < ira_reg_class_cover_size; i++)
-    death[ira_reg_class_cover[i]] = 0;
-  for (use = INSN_REG_USE_LIST (insn); use != NULL; use = use->next_insn_use)
-    if (dying_use_p (use))
-      {
-	cl = sched_regno_cover_class[use->regno];
-	if (use->regno < FIRST_PSEUDO_REGISTER)
-	  death[cl]++;
-	else
-	  death[cl] += ira_reg_class_nregs[cl][PSEUDO_REGNO_MODE (use->regno)];
-      }
+  calculate_reg_deaths (insn, death);
   pressure_info = INSN_REG_PRESSURE (insn);
   max_reg_pressure = INSN_MAX_REG_PRESSURE (insn);
   gcc_assert (pressure_info != NULL && max_reg_pressure != NULL);
@@ -1139,7 +1159,765 @@
     }
   INSN_REG_PRESSURE_EXCESS_COST_CHANGE (insn) = excess_cost_change;
 }
+
+/* This is the first page of code related to SCHED_PRESSURE_MODEL.
+   It tries to make the scheduler take register pressure into account
+   without introducing too many unnecessary stalls.  It hooks into the
+   main scheduling algorithm at several points:
+
+    - Before scheduling starts, model_start_schedule constructs a
+      "model schedule" for the current block.  This model schedule is
+      chosen solely to keep register pressure down.  It does not take the
+      target's pipeline or the original instruction order into account,
+      except as a tie-breaker.  It also doesn't work to a particular
+      pressure limit.
+
+      This model schedule gives us an idea of what pressure can be
+      achieved for the block gives us an example of a schedule that
+      keeps to that pressure.  It also makes the final schedule less
+      dependent on the original instruction order.  This is important
+      because the original order can either be "wide" (many values live
+      at once, such as in user-scheduled code) or "narrow" (few values
+      live at once, such as after loop unrolling, where several
+      iterations are executed sequentially).
+
+      We do not apply this model schedule to the rtx stream.  We simply
+      record it in model_schedule.  We also compute the maximum pressure,
+      MP, that was seen during this schedule.
+
+    - Instructions are added to the ready queue even if they require
+      a stall.  The length of the stall is instead computed as:
+
+	 MAX (INSN_TICK (INSN) - clock_var, 0)
+
+      (= insn_delay).  This allows rank_for_schedule to choose between
+      introducing a deliberate stall or increasing pressure.
+
+    - Before sorting the ready queue, model_set_excess_costs assigns
+      a pressure-based cost to each ready instruction in the queue.
+      This is the instruction's INSN_REG_PRESSURE_EXCESS_COST_CHANGE
+      (ECC for short) and is effectively measured in cycles.
+
+    - rank_for_schedule ranks instructions based on:
+
+	ECC (insn) + insn_delay (insn)
+
+      then as:
+
+	insn_delay (insn)
+
+      So, for example, an instruction X1 with an ECC of 1 that can issue
+      now will win over an instruction X0 with an ECC of zero that would
+      introduce a stall of one cycle.  However, an instruction X2 with an
+      ECC of 2 that can issue now will lose to X0.
+
+    - When an instruction is scheduled, model_recompute updates the model
+      schedule with the new pressures (some of which might now exceed the
+      original maximum pressure MP).  model_update_limit_points then searches
+      for the new point of maximum pressure, if not already known.  */
+
+/* Used to separate high-verbosity debug information for SCHED_PRESSURE_MODEL
+   from surrounding debug information.  */
+#define MODEL_BAR \
+  ";;\t\t+------------------------------------------------------\n"
+
+/* Information about the pressure on a particular register class at a
+   particular point of the model schedule.  */
+struct model_pressure_data {
+  /* The pressure at this point of the model schedule, or -1 if the
+     point is associated with an instruction that has already been
+     scheduled.  */
+  int ref_pressure;
+
+  /* The maximum pressure during or after this point of the model schedule.  */
+  int max_pressure;
+};
+
+/* Per-instruction information that is used while building the model
+   schedule.  Here, "schedule" refers to the model schedule rather
+   than the main schedule.  */
+struct model_insn_info {
+  /* The instruction itself.  */
+  rtx insn;
+
+  /* If this instruction is in model_worklist, these fields link to the
+     previous (higher-priority) and next (lower-priority) instructions
+     in the list.  */
+  struct model_insn_info *prev;
+  struct model_insn_info *next;
+
+  /* While constructing the schedule, QUEUE_INDEX describes whether an
+     instruction has already been added to the schedule (QUEUE_SCHEDULED),
+     is in model_worklist (QUEUE_READY), or neither (QUEUE_NOWHERE).
+     old_queue records the value that QUEUE_INDEX had before scheduling
+     started, so that we can restore it once the schedule is complete.  */
+  int old_queue;
+
+  /* The relative importance of an unscheduled instruction.  Higher
+     values indicate greater importance.  */
+  unsigned int model_priority;
+
+  /* The length of the longest path of satisfied true dependencies
+     that leads to this instruction.  */
+  unsigned int depth;
+
+  /* The length of the longest path of dependencies of any kind
+     that leads from this instruction.  */
+  unsigned int alap;
+
+  /* The number of predecessor nodes that must still be scheduled.  */
+  int unscheduled_preds;
+};
+
+/* Information about the pressure limit for a particular register class.
+   This structure is used when applying a model schedule to the main
+   schedule.  */
+struct model_pressure_limit {
+  /* The maximum register pressure seen in the original model schedule.  */
+  int orig_pressure;
+
+  /* The maximum register pressure seen in the current model schedule
+     (which excludes instructions that have already been scheduled).  */
+  int pressure;
+
+  /* The point of the current model schedule at which PRESSURE is first
+     reached.  It is set to -1 if the value needs to be recomputed.  */
+  int point;
+};
+
+/* Describes a particular way of measuring register pressure.  */
+struct model_pressure_group {
+  /* Index CCI describes the maximum pressure on ira_reg_class_cover[CCI].  */
+  struct model_pressure_limit limits[N_REG_CLASSES];
+
+  /* Index (POINT * ira_num_pressure_classes + CCI) describes the pressure
+     on register class ira_reg_class_cover[CCI] at point POINT of the
+     current model schedule.  A POINT of model_num_insns describes the
+     pressure at the end of the schedule.  */
+  struct model_pressure_data *model;
+};
+
+/* Index POINT gives the instruction at point POINT of the model schedule.
+   This array doesn't change during main scheduling.  */
+static VEC (rtx, heap) *model_schedule;
+
+/* The list of instructions in the model worklist, sorted in order of
+   decreasing priority.  */
+static struct model_insn_info *model_worklist;
+
+/* Index I describes the instruction with INSN_LUID I.  */
+static struct model_insn_info *model_insns;
+
+/* The number of instructions in the model schedule.  */
+static int model_num_insns;
+
+/* The index of the first instruction in model_schedule that hasn't yet been
+   added to the main schedule, or model_num_insns if all of them have.  */
+static int model_curr_point;
+
+/* Describes the pressure before each instruction in the model schedule.  */
+static struct model_pressure_group model_before_pressure;
+
+/* The first unused model_priority value (as used in model_insn_info).  */
+static unsigned int model_next_priority;
+
+
+/* The model_pressure_data for ira_reg_class_cover[CCI] in GROUP
+   at point POINT of the model schedule.  */
+#define MODEL_PRESSURE_DATA(GROUP, POINT, CCI) \
+  (&(GROUP)->model[(POINT) * ira_reg_class_cover_size + (CCI)])
+
+/* The maximum pressure on ira_reg_class_cover[CCI] in GROUP at or
+   after point POINT of the model schedule.  */
+#define MODEL_MAX_PRESSURE(GROUP, POINT, CCI) \
+  (MODEL_PRESSURE_DATA (GROUP, POINT, CCI)->max_pressure)
+
+/* The pressure on ira_reg_class_cover[CCI] in GROUP at point POINT
+   of the model schedule.  */
+#define MODEL_REF_PRESSURE(GROUP, POINT, CCI) \
+  (MODEL_PRESSURE_DATA (GROUP, POINT, CCI)->ref_pressure)
+
+/* Information about INSN that is used when creating the model schedule.  */
+#define MODEL_INSN_INFO(INSN) \
+  (&model_insns[INSN_LUID (INSN)])
+
+/* The instruction at point POINT of the model schedule.  */
+#define MODEL_INSN(POINT) \
+  (VEC_index (rtx, model_schedule, POINT))
+
+
+/* Return INSN's index in the model schedule, or model_num_insns if it
+   doesn't belong to that schedule.  */
+
+static int
+model_index (rtx insn)
+{
+  if (INSN_MODEL_INDEX (insn) == 0)
+    return model_num_insns;
+  return INSN_MODEL_INDEX (insn) - 1;
+}
+
+/* Make sure that GROUP->limits is up-to-date for the current point
+   of the model schedule.  */
+
+static void
+model_update_limit_points_in_group (struct model_pressure_group *group)
+{
+  int cci, max_pressure, point;
+
+  for (cci = 0; cci < ira_reg_class_cover_size; cci++)
+    {
+      /* We may have passed the final point at which the pressure in
+	 group->limits[cci].pressure was reached.  Update the limit if so.  */
+      max_pressure = MODEL_MAX_PRESSURE (group, model_curr_point, cci);
+      group->limits[cci].pressure = max_pressure;
+
+      /* Find the point at which MAX_PRESSURE is first reached.  We need
+	 to search in three cases:
+
+	 - We've already moved past the previous pressure point.
+	   In this case we search forward from model_curr_point.
+
+	 - We scheduled the previous point of maximum pressure ahead of
+	   its position in the model schedule, but doing so didn't bring
+	   the pressure point earlier.  In this case we search forward
+	   from that previous pressure point.
+
+	 - Scheduling an instruction early caused the maximum pressure
+	   to decrease.  In this case we will have set the pressure
+	   point to -1, and we search forward from model_curr_point.  */
+      point = MAX (group->limits[cci].point, model_curr_point);
+      while (point < model_num_insns
+	     && MODEL_REF_PRESSURE (group, point, cci) < max_pressure)
+	point++;
+      group->limits[cci].point = point;
+
+      gcc_assert (MODEL_REF_PRESSURE (group, point, cci) == max_pressure);
+      gcc_assert (MODEL_MAX_PRESSURE (group, point, cci) == max_pressure);
+    }
+}
+
+/* Make sure that all register-pressure limits are up-to-date for the
+   current position in the model schedule.  */
+
+static void
+model_update_limit_points (void)
+{
+  model_update_limit_points_in_group (&model_before_pressure);
+}
+
+/* Return the model_index of the last unscheduled use in chain USE
+   outside of USE's instruction.  Return -1 if there are no other uses,
+   or model_num_insns if the register is live at the end of the block.  */
+
+static int
+model_last_use_except (struct reg_use_data *use)
+{
+  struct reg_use_data *next;
+  int last, index;
+
+  last = -1;
+  for (next = use->next_regno_use; next != use; next = next->next_regno_use)
+    if (NONDEBUG_INSN_P (next->insn)
+	&& QUEUE_INDEX (next->insn) != QUEUE_SCHEDULED)
+      {
+	index = model_index (next->insn);
+	if (index == model_num_insns)
+	  return model_num_insns;
+	if (last < index)
+	  last = index;
+      }
+  return last;
+}
+
+/* An instruction with model_index POINT has just been scheduled, and it
+   adds DELTA to the pressure on ira_reg_class_cover[CCI] after POINT - 1.
+   Update MODEL_REF_PRESSURE (GROUP, POINT, CCI) and
+   MODEL_MAX_PRESSURE (GROUP, POINT, CCI) accordingly.  */
+
+static void
+model_start_update_pressure (struct model_pressure_group *group,
+			     int point, int cci, int delta)
+{
+  int next_max_pressure;
+
+  if (point == model_num_insns)
+    {
+      /* The instruction wasn't part of the model schedule; it was moved
+	 from a different block.  Update the pressure for the end of
+	 the model schedule.  */
+      MODEL_REF_PRESSURE (group, point, cci) += delta;
+      MODEL_MAX_PRESSURE (group, point, cci) += delta;
+    }
+  else
+    {
+      /* Record that this instruction has been scheduled.  Nothing now
+	 changes between POINT and POINT + 1, so get the maximum pressure
+	 from the latter.  If the maximum pressure decreases, the new
+	 pressure point may be before POINT.  */
+      MODEL_REF_PRESSURE (group, point, cci) = -1;
+      next_max_pressure = MODEL_MAX_PRESSURE (group, point + 1, cci);
+      if (MODEL_MAX_PRESSURE (group, point, cci) > next_max_pressure)
+	{
+	  MODEL_MAX_PRESSURE (group, point, cci) = next_max_pressure;
+	  if (group->limits[cci].point == point)
+	    group->limits[cci].point = -1;
+	}
+    }
+}
+
+/* Record that scheduling a later instruction has changed the pressure
+   at point POINT of the model schedule by DELTA (which might be 0).
+   Update GROUP accordingly.  Return nonzero if these changes might
+   trigger changes to previous points as well.  */
+
+static int
+model_update_pressure (struct model_pressure_group *group,
+		       int point, int cci, int delta)
+{
+  int ref_pressure, max_pressure, next_max_pressure;
+
+  /* If POINT hasn't yet been scheduled, update its pressure.  */
+  ref_pressure = MODEL_REF_PRESSURE (group, point, cci);
+  if (ref_pressure >= 0 && delta != 0)
+    {
+      ref_pressure += delta;
+      MODEL_REF_PRESSURE (group, point, cci) = ref_pressure;
+
+      /* Check whether the maximum pressure in the overall schedule
+	 has increased.  (This means that the MODEL_MAX_PRESSURE of
+	 every point <= POINT will need to increae too; see below.)  */
+      if (group->limits[cci].pressure < ref_pressure)
+	group->limits[cci].pressure = ref_pressure;
+
+      /* If we are at maximum pressure, and the maximum pressure
+	 point was previously unknown or later than POINT,
+	 bring it forward.  */
+      if (group->limits[cci].pressure == ref_pressure
+	  && !IN_RANGE (group->limits[cci].point, 0, point))
+	group->limits[cci].point = point;
+
+      /* If POINT used to be the point of maximum pressure, but isn't
+	 any longer, we need to recalculate it using a forward walk.  */
+      if (group->limits[cci].pressure > ref_pressure
+	  && group->limits[cci].point == point)
+	group->limits[cci].point = -1;
+    }
+
+  /* Update the maximum pressure at POINT.  Changes here might also
+     affect the maximum pressure at POINT - 1.  */
+  next_max_pressure = MODEL_MAX_PRESSURE (group, point + 1, cci);
+  max_pressure = MAX (ref_pressure, next_max_pressure);
+  if (MODEL_MAX_PRESSURE (group, point, cci) != max_pressure)
+    {
+      MODEL_MAX_PRESSURE (group, point, cci) = max_pressure;
+      return 1;
+    }
+  return 0;
+}
+
+/* INSN has just been scheduled.  Update the model schedule accordingly.  */
+
+static void
+model_recompute (rtx insn)
+{
+  struct {
+    int last_use;
+    int regno;
+  } uses[FIRST_PSEUDO_REGISTER + MAX_RECOG_OPERANDS];
+  struct reg_use_data *use;
+  struct reg_pressure_data *reg_pressure;
+  int delta[N_REG_CLASSES];
+  int cci, point, mix, new_last, cl, ref_pressure, queue;
+  unsigned int i, num_uses, num_pending_births;
+  bool print_p;
+
+  /* The destinations of INSN were previously live from POINT onwards, but are
+     now live from model_curr_point onwards.  Set up DELTA accordingly.  */
+  point = model_index (insn);
+  reg_pressure = INSN_REG_PRESSURE (insn);
+  for (cci = 0; cci < ira_reg_class_cover_size; cci++)
+    {
+      cl = ira_reg_class_cover[cci];
+      delta[cl] = reg_pressure[cci].set_increase;
+    }
+
+  /* Record which registers previously died at POINT, but which now die
+     before POINT.  Adjust DELTA so that it represents the effect of
+     this change after POINT - 1.  Set NUM_PENDING_BIRTHS to the number of
+     registers that will be born in the range [model_curr_point, POINT).  */
+  num_uses = 0;
+  num_pending_births = 0;
+  for (use = INSN_REG_USE_LIST (insn); use != NULL; use = use->next_insn_use)
+    {
+      new_last = model_last_use_except (use);
+      if (new_last < point)
+	{
+	  gcc_assert (num_uses < ARRAY_SIZE (uses));
+	  uses[num_uses].last_use = new_last;
+	  uses[num_uses].regno = use->regno;
+	  /* This register is no longer live after POINT - 1.  */
+	  mark_regno_birth_or_death (NULL, delta, use->regno, false);
+	  num_uses++;
+	  if (new_last >= 0)
+	    num_pending_births++;
+	}
+    }
+
+  /* Update the MODEL_REF_PRESSURE and MODEL_MAX_PRESSURE for POINT.
+     Also set each group pressure limit for POINT.  */
+  for (cci = 0; cci < ira_reg_class_cover_size; cci++)
+    {
+      cl = ira_reg_class_cover[cci];
+      model_start_update_pressure (&model_before_pressure,
+				   point, cci, delta[cl]);
+    }
+
+  /* Walk the model schedule backwards, starting immediately before POINT.  */
+  print_p = false;
+  if (point != model_curr_point)
+    do
+      {
+	point--;
+	insn = MODEL_INSN (point);
+	queue = QUEUE_INDEX (insn);
 
+	if (queue != QUEUE_SCHEDULED)
+	  {
+	    /* DELTA describes the effect of the move on the register pressure
+	       after POINT.  Make it describe the effect on the pressure
+	       before POINT.  */
+	    i = 0;
+	    while (i < num_uses)
+	      {
+		if (uses[i].last_use == point)
+		  {
+		    /* This register is now live again.  */
+		    mark_regno_birth_or_death (NULL, delta,
+					       uses[i].regno, true);
+
+		    /* Remove this use from the array.  */
+		    uses[i] = uses[num_uses - 1];
+		    num_uses--;
+		    num_pending_births--;
+		  }
+		else
+		  i++;
+	      }
+
+	    if (sched_verbose >= 5)
+	      {
+		char buf[2048];
+
+		if (!print_p)
+		  {
+		    fprintf (sched_dump, MODEL_BAR);
+		    fprintf (sched_dump, ";;\t\t| New pressure for model"
+			     " schedule\n");
+		    fprintf (sched_dump, MODEL_BAR);
+		    print_p = true;
+		  }
+
+		print_pattern (buf, PATTERN (insn), 0);
+		fprintf (sched_dump, ";;\t\t| %3d %4d %-30s ",
+			 point, INSN_UID (insn), buf);
+		for (cci = 0; cci < ira_reg_class_cover_size; cci++)
+		  {
+		    cl = ira_reg_class_cover[cci];
+		    ref_pressure = MODEL_REF_PRESSURE (&model_before_pressure,
+						       point, cci);
+		    fprintf (sched_dump, " %s:[%d->%d]",
+			     reg_class_names[ira_reg_class_cover[cci]],
+			     ref_pressure, ref_pressure + delta[cl]);
+		  }
+		fprintf (sched_dump, "\n");
+	      }
+	  }
+
+	/* Adjust the pressure at POINT.  Set MIX to nonzero if POINT - 1
+	   might have changed as well.  */
+	mix = num_pending_births;
+	for (cci = 0; cci < ira_reg_class_cover_size; cci++)
+	  {
+	    cl = ira_reg_class_cover[cci];
+	    mix |= delta[cl];
+	    mix |= model_update_pressure (&model_before_pressure,
+					  point, cci, delta[cl]);
+	  }
+      }
+    while (mix && point > model_curr_point);
+
+  if (print_p)
+    fprintf (sched_dump, MODEL_BAR);
+}
+
+/* model_spill_cost (CL, P, P') returns the cost of increasing the
+   pressure on CL from P to P'.  We use this to calculate a "base ECC",
+   baseECC (CL, X), for each cover class CL and each instruction X.
+   Supposing X changes the pressure on CL from P to P', and that the
+   maximum pressure on CL in the current model schedule is MP', then:
+
+   * if X occurs before or at the next point of maximum pressure in
+     the model schedule and P' > MP', then:
+
+       baseECC (CL, X) = model_spill_cost (CL, MP, P')
+
+     The idea is that the pressure after scheduling a fixed set of
+     instructions -- in this case, the set up to and including the
+     next maximum pressure point -- is going to be the same regardless
+     of the order; we simply want to keep the intermediate pressure
+     under control.  Thus X has a cost of zero unless scheduling it
+     now would exceed MP'.
+
+     If all increases in the set are by the same amount, no zero-cost
+     instruction will ever cause the pressure to exceed MP'.  However,
+     if X is instead moved past an instruction X' with pressure in the
+     range (MP' - (P' - P), MP'), the pressure at X' will increase
+     beyond MP'.  Since baseECC is very much a heuristic anyway,
+     it doesn't seem worth the overhead of tracking cases like these.
+
+     The cost of exceeding MP' is always based on the original maximum
+     pressure MP.  This is so that going 2 registers over the original
+     limit has the same cost regardless of whether it comes from two
+     separate +1 deltas or from a single +2 delta.
+
+   * if X occurs after the next point of maximum pressure in the model
+     schedule and P' > P, then:
+
+       baseECC (CL, X) = model_spill_cost (CL, MP, MP' + (P' - P))
+
+     That is, if we move X forward across a point of maximum pressure,
+     and if X increases the pressure by P' - P, then we conservatively
+     assume that scheduling X next would increase the maximum pressure
+     by P' - P.  Again, the cost of doing this is based on the original
+     maximum pressure MP, for the same reason as above.
+
+   * if P' < P, P > MP, and X occurs at or after the next point of
+     maximum pressure, then:
+
+       baseECC (CL, X) = -model_spill_cost (CL, MAX (MP, P'), P)
+
+     That is, if we have already exceeded the original maximum pressure MP,
+     and if X might reduce the maximum pressure again -- or at least push
+     it further back, and thus allow more scheduling freedom -- it is given
+     a negative cost to reflect the improvement.
+
+   * otherwise,
+
+       baseECC (CL, X) = 0
+
+     In this case, X is not expected to affect the maximum pressure MP',
+     so it has zero cost.
+
+   We then create a combined value baseECC (X) that is the sum of
+   baseECC (CL, X) for each cover class CL.
+
+   baseECC (X) could itself be used as the ECC value described above.
+   However, this is often too conservative, in the sense that it
+   tends to make high-priority instructions that increase pressure
+   wait too long in cases where introducing a spill would be better.
+   For this reason the final ECC is a priority-adjusted form of
+   baseECC (X).  Specifically, we calculate:
+
+     P (X) = INSN_PRIORITY (X) - insn_delay (X) - baseECC (X)
+     baseP = MAX { P (X) | baseECC (X) <= 0 }
+
+   Then:
+
+     ECC (X) = MAX (MIN (baseP - P (X), baseECC (X)), 0)
+
+   Thus an instruction's effect on pressure is ignored if it has a high
+   enough priority relative to the ones that don't increase pressure.
+   Negative values of baseECC (X) do not increase the priority of X
+   itself, but they do make it harder for other instructions to
+   increase the pressure further.
+
+   This pressure cost is deliberately timid.  The intention has been
+   to choose a heuristic that rarely interferes with the normal list
+   scheduler in cases where that scheduler would produce good code.
+   We simply want to curb some of its worst excesses.  */
+
+/* Return the cost of increasing the pressure in class CL from FROM to TO.
+
+   Here we use the very simplistic cost model that every register above
+   ira_available_class_regs[CL] has a spill cost of 1.  We could use other
+   measures instead, such as one based on MEMORY_MOVE_COST.  However:
+
+      (1) In order for an instruction to be scheduled, the higher cost
+	  would need to be justified in a single saving of that many stalls.
+	  This is overly pessimistic, because the benefit of spilling is
+	  often to avoid a sequence of several short stalls rather than
+	  a single long one.
+
+      (2) The cost is still arbitrary.  Because we are not allocating
+	  registers during scheduling, we have no way of knowing for
+	  sure how many memory accesses will be required by each spill,
+	  where the spills will be placed within the block, or even
+	  which block(s) will contain the spills.
+
+   So a higher cost than 1 is often too conservative in practice,
+   forcing blocks to contain unnecessary stalls instead of spill code.
+   The simple cost below seems to be the best compromise.  It reduces
+   the interference with the normal list scheduler, which helps make
+   it more suitable for a default-on option.  */
+
+static int
+model_spill_cost (int cl, int from, int to)
+{
+  from = MAX (from, ira_available_class_regs[cl]);
+  return MAX (to, from) - from;
+}
+
+/* Return baseECC (ira_reg_class_cover[CCI], POINT), given that
+   P = curr_reg_pressure[ira_reg_class_cover[CCI]] and that
+   P' = P + DELTA.  */
+
+static int
+model_excess_group_cost (struct model_pressure_group *group,
+			 int point, int cci, int delta)
+{
+  int pressure, cl;
+
+  cl = ira_reg_class_cover[cci];
+  if (delta < 0 && point >= group->limits[cci].point)
+    {
+      pressure = MAX (group->limits[cci].orig_pressure,
+		      curr_reg_pressure[cl] + delta);
+      return -model_spill_cost (cl, pressure, curr_reg_pressure[cl]);
+    }
+
+  if (delta > 0)
+    {
+      if (point > group->limits[cci].point)
+	pressure = group->limits[cci].pressure + delta;
+      else
+	pressure = curr_reg_pressure[cl] + delta;
+
+      if (pressure > group->limits[cci].pressure)
+	return model_spill_cost (cl, group->limits[cci].orig_pressure,
+				 pressure);
+    }
+
+  return 0;
+}
+
+/* Return baseECC (MODEL_INSN (INSN)).  Dump the costs to sched_dump
+   if PRINT_P.  */
+
+static int
+model_excess_cost (rtx insn, bool print_p)
+{
+  int point, cci, cl, cost, this_cost, delta;
+  struct reg_pressure_data *insn_reg_pressure;
+  int insn_death[N_REG_CLASSES];
+
+  calculate_reg_deaths (insn, insn_death);
+  point = model_index (insn);
+  insn_reg_pressure = INSN_REG_PRESSURE (insn);
+  cost = 0;
+
+  if (print_p)
+    fprintf (sched_dump, ";;\t\t| %3d %4d | %4d %+3d |", point,
+	     INSN_UID (insn), INSN_PRIORITY (insn), insn_delay (insn));
+
+  /* Sum up the individual costs for each register class.  */
+  for (cci = 0; cci < ira_reg_class_cover_size; cci++)
+    {
+      cl = ira_reg_class_cover[cci];
+      delta = insn_reg_pressure[cci].set_increase - insn_death[cl];
+      this_cost = model_excess_group_cost (&model_before_pressure,
+					   point, cci, delta);
+      cost += this_cost;
+      if (print_p)
+	fprintf (sched_dump, " %s:[%d base cost %d]",
+		 reg_class_names[cl], delta, this_cost);
+    }
+
+  if (print_p)
+    fprintf (sched_dump, "\n");
+
+  return cost;
+}
+
+/* Dump the next points of maximum pressure for GROUP.  */
+
+static void
+model_dump_pressure_points (struct model_pressure_group *group)
+{
+  int cci, cl;
+
+  fprintf (sched_dump, ";;\t\t|  pressure points");
+  for (cci = 0; cci < ira_reg_class_cover_size; cci++)
+    {
+      cl = ira_reg_class_cover[cci];
+      fprintf (sched_dump, " %s:[%d->%d at ", reg_class_names[cl],
+	       curr_reg_pressure[cl], group->limits[cci].pressure);
+      if (group->limits[cci].point < model_num_insns)
+	fprintf (sched_dump, "%d:%d]", group->limits[cci].point,
+		 INSN_UID (MODEL_INSN (group->limits[cci].point)));
+      else
+	fprintf (sched_dump, "end]");
+    }
+  fprintf (sched_dump, "\n");
+}
+
+/* Set INSN_REG_PRESSURE_EXCESS_COST_CHANGE for INSNS[0...COUNT-1].  */
+
+static void
+model_set_excess_costs (rtx *insns, int count)
+{
+  int i, cost, priority_base, priority;
+  bool print_p;
+
+  /* Record the baseECC value for each instruction in the model schedule,
+     except that negative costs are converted to zero ones now rather thatn
+     later.  Do not assign a cost to debug instructions, since they must
+     not change code-generation decisions.  Experiments suggest we also
+     get better results by not assigning a cost to instructions from
+     a different block.
+
+     Set PRIORITY_BASE to baseP in the block comment above.  This is the
+     maximum priority of the "cheap" instructions, which should always
+     include the next model instruction.  */
+  priority_base = 0;
+  print_p = false;
+  for (i = 0; i < count; i++)
+    if (INSN_MODEL_INDEX (insns[i]))
+      {
+	if (sched_verbose >= 6 && !print_p)
+	  {
+	    fprintf (sched_dump, MODEL_BAR);
+	    fprintf (sched_dump, ";;\t\t| Pressure costs for ready queue\n");
+	    model_dump_pressure_points (&model_before_pressure);
+	    fprintf (sched_dump, MODEL_BAR);
+	    print_p = true;
+	  }
+	cost = model_excess_cost (insns[i], print_p);
+	if (cost <= 0)
+	  {
+	    priority = INSN_PRIORITY (insns[i]) - insn_delay (insns[i]) - cost;
+	    priority_base = MAX (priority_base, priority);
+	    cost = 0;
+	  }
+	INSN_REG_PRESSURE_EXCESS_COST_CHANGE (insns[i]) = cost;
+      }
+  if (print_p)
+    fprintf (sched_dump, MODEL_BAR);
+
+  /* Use MAX (baseECC, 0) and baseP to calculcate ECC for each
+     instruction.  */
+  for (i = 0; i < count; i++)
+    {
+      cost = INSN_REG_PRESSURE_EXCESS_COST_CHANGE (insns[i]);
+      priority = INSN_PRIORITY (insns[i]) - insn_delay (insns[i]);
+      if (cost > 0 && priority > priority_base)
+	{
+	  cost += priority_base - priority;
+	  INSN_REG_PRESSURE_EXCESS_COST_CHANGE (insns[i]) = MAX (cost, 0);
+	}
+    }
+}
+
 /* Returns a positive value if x is preferred; returns a negative value if
    y is preferred.  Should never return 0, since that will make the sort
    unstable.  */
@@ -1170,23 +1948,20 @@
   /* Make sure that priority of TMP and TMP2 are initialized.  */
   gcc_assert (INSN_PRIORITY_KNOWN (tmp) && INSN_PRIORITY_KNOWN (tmp2));
 
-  if (sched_pressure_p)
+  if (sched_pressure != SCHED_PRESSURE_NONE)
     {
       int diff;
 
       /* Prefer insn whose scheduling results in the smallest register
 	 pressure excess.  */
       if ((diff = (INSN_REG_PRESSURE_EXCESS_COST_CHANGE (tmp)
-		   + (INSN_TICK (tmp) > clock_var
-		      ? INSN_TICK (tmp) - clock_var : 0)
+		   + insn_delay (tmp)
 		   - INSN_REG_PRESSURE_EXCESS_COST_CHANGE (tmp2)
-		   - (INSN_TICK (tmp2) > clock_var
-		      ? INSN_TICK (tmp2) - clock_var : 0))) != 0)
+		   - insn_delay (tmp2))))
 	return diff;
     }
 
-
-  if (sched_pressure_p
+  if (sched_pressure != SCHED_PRESSURE_NONE
       && (INSN_TICK (tmp2) > clock_var || INSN_TICK (tmp) > clock_var))
     {
       if (INSN_TICK (tmp) <= clock_var)
@@ -1277,11 +2052,22 @@
 	return val;
     }
 
+  /* Prefer instructions that occur earlier in the model schedule.  */
+  if (sched_pressure == SCHED_PRESSURE_MODEL)
+    {
+      int diff;
+
+      diff = model_index (tmp) - model_index (tmp2);
+      if (diff != 0)
+	return diff;
+    }
+
   /* Prefer the insn which has more later insns that depend on it.
      This gives the scheduler more freedom when scheduling later
      instructions at the expense of added register pressure.  */
 
-  val = (dep_list_size (tmp2) - dep_list_size (tmp));
+  val = (dep_list_size (tmp2, SD_LIST_FORW)
+	 - dep_list_size (tmp, SD_LIST_FORW));
 
   if (flag_sched_dep_count_heuristic && val != 0)
     return val;
@@ -1480,12 +2266,15 @@
   int i;
   rtx *first = ready_lastpos (ready);
 
-  if (sched_pressure_p)
+  if (sched_pressure == SCHED_PRESSURE_WEIGHTED)
     {
       for (i = 0; i < ready->n_ready; i++)
 	if (!DEBUG_INSN_P (first[i]))
 	  setup_insn_reg_pressure_info (first[i]);
     }
+  if (sched_pressure == SCHED_PRESSURE_MODEL
+      && model_curr_point < model_num_insns)
+    model_set_excess_costs (first, ready->n_ready);
   SCHED_SORT (first, ready->n_ready);
 }
 
@@ -1551,10 +2340,12 @@
   gcc_checking_assert (!DEBUG_INSN_P (insn));
 
   for (use = INSN_REG_USE_LIST (insn); use != NULL; use = use->next_insn_use)
-    if (dying_use_p (use) && bitmap_bit_p (curr_reg_live, use->regno))
-      mark_regno_birth_or_death (use->regno, false);
+    if (dying_use_p (use))
+      mark_regno_birth_or_death (curr_reg_live, curr_reg_pressure,
+				 use->regno, false);
   for (set = INSN_REG_SET_LIST (insn); set != NULL; set = set->next_insn_set)
-    mark_regno_birth_or_death (set->regno, true);
+    mark_regno_birth_or_death (curr_reg_live, curr_reg_pressure,
+			       set->regno, true);
 }
 
 /* Set up or update (if UPDATE_P) max register pressure (see its
@@ -1626,11 +2417,618 @@
 void
 sched_setup_bb_reg_pressure_info (basic_block bb, rtx after)
 {
-  gcc_assert (sched_pressure_p);
+  gcc_assert (sched_pressure == SCHED_PRESSURE_WEIGHTED);
   initiate_bb_reg_pressure_info (bb);
   setup_insn_max_reg_pressure (after, false);
 }
+
+/* Return (in order):
+
+   - positive if INSN adversely affects the pressure on one
+     register class
+
+   - negative if INSN reduces the pressure on one register class
+
+   - 0 if INSN doesn't affect the pressure on any register class.  */
+
+static int
+model_classify_pressure (struct model_insn_info *insn)
+{
+  struct reg_pressure_data *reg_pressure;
+  int death[N_REG_CLASSES];
+  int cci, cl, sum;
+
+  calculate_reg_deaths (insn->insn, death);
+  reg_pressure = INSN_REG_PRESSURE (insn->insn);
+  sum = 0;
+  for (cci = 0; cci < ira_reg_class_cover_size; cci++)
+    {
+      cl = ira_reg_class_cover[cci];
+      if (death[cl] < reg_pressure[cci].set_increase)
+	return 1;
+      sum += reg_pressure[cci].set_increase - death[cl];
+    }
+  return sum;
+}
+
+/* Return true if INSN1 should come before INSN2 in the model schedule.  */
+
+static int
+model_order_p (struct model_insn_info *insn1, struct model_insn_info *insn2)
+{
+  unsigned int height1, height2;
+  unsigned int priority1, priority2;
+
+  /* Prefer instructions with a higher model priority.  */
+  if (insn1->model_priority != insn2->model_priority)
+    return insn1->model_priority > insn2->model_priority;
+
+  /* Combine the length of the longest path of satisfied true dependencies
+     that leads to each instruction (depth) with the length of the longest
+     path of any dependencies that leads from the instruction (alap).
+     Prefer instructions with the greatest combined length.  If the combined
+     lengths are equal, prefer instructions with the greatest depth.
+
+     The idea is that, if we have a set S of "equal" instructions that each
+     have ALAP value X, and we pick one such instruction I, any true-dependent
+     successors of I that have ALAP value X - 1 should be preferred over S.
+     This encourages the schedule to be "narrow" rather than "wide".
+     However, if I is a low-priority instruction that we decided to
+     schedule because of its model_classify_pressure, and if there
+     is a set of higher-priority instructions T, the aforementioned
+     successors of I should not have the edge over T.  */
+  height1 = insn1->depth + insn1->alap;
+  height2 = insn2->depth + insn2->alap;
+  if (height1 != height2)
+    return height1 > height2;
+  if (insn1->depth != insn2->depth)
+    return insn1->depth > insn2->depth;
+
+  /* We have no real preference between INSN1 an INSN2 as far as attempts
+     to reduce pressure go.  Prefer instructions with higher priorities.  */
+  priority1 = INSN_PRIORITY (insn1->insn);
+  priority2 = INSN_PRIORITY (insn2->insn);
+  if (priority1 != priority2)
+    return priority1 > priority2;
+
+  /* Use the original rtl sequence as a tie-breaker.  */
+  return insn1 < insn2;
+}
+
+/* Add INSN to the model worklist immediately after PREV.  Add it to the
+   beginning of the list if PREV is null.  */
+
+static void
+model_add_to_worklist_at (struct model_insn_info *insn,
+			  struct model_insn_info *prev)
+{
+  gcc_assert (QUEUE_INDEX (insn->insn) == QUEUE_NOWHERE);
+  QUEUE_INDEX (insn->insn) = QUEUE_READY;
+
+  insn->prev = prev;
+  if (prev)
+    {
+      insn->next = prev->next;
+      prev->next = insn;
+    }
+  else
+    {
+      insn->next = model_worklist;
+      model_worklist = insn;
+    }
+  if (insn->next)
+    insn->next->prev = insn;
+}
+
+/* Remove INSN from the model worklist.  */
+
+static void
+model_remove_from_worklist (struct model_insn_info *insn)
+{
+  gcc_assert (QUEUE_INDEX (insn->insn) == QUEUE_READY);
+  QUEUE_INDEX (insn->insn) = QUEUE_NOWHERE;
+
+  if (insn->prev)
+    insn->prev->next = insn->next;
+  else
+    model_worklist = insn->next;
+  if (insn->next)
+    insn->next->prev = insn->prev;
+}
+
+/* Add INSN to the model worklist.  Start looking for a suitable position
+   between neighbors PREV and NEXT, testing at most MAX_SCHED_READY_INSNS
+   insns either side.  A null PREV indicates the beginning of the list and
+   a null NEXT indicates the end.  */
+
+static void
+model_add_to_worklist (struct model_insn_info *insn,
+		       struct model_insn_info *prev,
+		       struct model_insn_info *next)
+{
+  int count;
+
+  count = MAX_SCHED_READY_INSNS;
+  if (count > 0 && prev && model_order_p (insn, prev))
+    do
+      {
+	count--;
+	prev = prev->prev;
+      }
+    while (count > 0 && prev && model_order_p (insn, prev));
+  else
+    while (count > 0 && next && model_order_p (next, insn))
+      {
+	count--;
+	prev = next;
+	next = next->next;
+      }
+  model_add_to_worklist_at (insn, prev);
+}
+
+/* INSN may now have a higher priority (in the model_order_p sense)
+   than before.  Move it up the worklist if necessary.  */
+
+static void
+model_promote_insn (struct model_insn_info *insn)
+{
+  struct model_insn_info *prev;
+  int count;
+
+  prev = insn->prev;
+  count = MAX_SCHED_READY_INSNS;
+  while (count > 0 && prev && model_order_p (insn, prev))
+    {
+      count--;
+      prev = prev->prev;
+    }
+  if (prev != insn->prev)
+    {
+      model_remove_from_worklist (insn);
+      model_add_to_worklist_at (insn, prev);
+    }
+}
+
+/* Add INSN to the end of the model schedule.  */
+
+static void
+model_add_to_schedule (rtx insn)
+{
+  unsigned int point;
+
+  gcc_assert (QUEUE_INDEX (insn) == QUEUE_NOWHERE);
+  QUEUE_INDEX (insn) = QUEUE_SCHEDULED;
+
+  point = VEC_length (rtx, model_schedule);
+  VEC_quick_push (rtx, model_schedule, insn);
+  INSN_MODEL_INDEX (insn) = point + 1;
+}
+
+/* Analyze the instructions that are to be scheduled, setting up
+   MODEL_INSN_INFO (...) and model_num_insns accordingly.  Add ready
+   instructions to model_worklist.  */
+
+static void
+model_analyze_insns (void)
+{
+  rtx start, end, iter;
+  sd_iterator_def sd_it;
+  dep_t dep;
+  struct model_insn_info *insn, *con;
+
+  model_num_insns = 0;
+  start = PREV_INSN (current_sched_info->next_tail);
+  end = current_sched_info->prev_head;
+  for (iter = start; iter != end; iter = PREV_INSN (iter))
+    if (NONDEBUG_INSN_P (iter))
+      {
+	insn = MODEL_INSN_INFO (iter);
+	insn->insn = iter;
+	FOR_EACH_DEP (iter, SD_LIST_FORW, sd_it, dep)
+	  {
+	    con = MODEL_INSN_INFO (DEP_CON (dep));
+	    if (con->insn && insn->alap < con->alap + 1)
+	      insn->alap = con->alap + 1;
+	  }
+
+	insn->old_queue = QUEUE_INDEX (iter);
+	QUEUE_INDEX (iter) = QUEUE_NOWHERE;
+
+	insn->unscheduled_preds = dep_list_size (iter, SD_LIST_HARD_BACK);
+	if (insn->unscheduled_preds == 0)
+	  model_add_to_worklist (insn, NULL, model_worklist);
+
+	model_num_insns++;
+      }
+}
+
+/* The global state describes the register pressure at the start of the
+   model schedule.  Initialize GROUP accordingly.  */
+
+static void
+model_init_pressure_group (struct model_pressure_group *group)
+{
+  int cci, cl;
+
+  for (cci = 0; cci < ira_reg_class_cover_size; cci++)
+    {
+      cl = ira_reg_class_cover[cci];
+      group->limits[cci].pressure = curr_reg_pressure[cl];
+      group->limits[cci].point = 0;
+    }
+  /* Use index model_num_insns to record the state after the last
+     instruction in the model schedule.  */
+  group->model = XNEWVEC (struct model_pressure_data,
+			  (model_num_insns + 1) * ira_reg_class_cover_size);
+}
+
+/* Record that MODEL_REF_PRESSURE (GROUP, POINT, CCI) is PRESSURE.
+   Update the maximum pressure for the whole schedule.  */
+
+static void
+model_record_pressure (struct model_pressure_group *group,
+		       int point, int cci, int pressure)
+{
+  MODEL_REF_PRESSURE (group, point, cci) = pressure;
+  if (group->limits[cci].pressure < pressure)
+    {
+      group->limits[cci].pressure = pressure;
+      group->limits[cci].point = point;
+    }
+}
+
+/* INSN has just been added to the end of the model schedule.  Record its
+   register-pressure information.  */
+
+static void
+model_record_pressures (struct model_insn_info *insn)
+{
+  struct reg_pressure_data *reg_pressure;
+  int point, cci, cl, delta;
+  int death[N_REG_CLASSES];
+
+  point = model_index (insn->insn);
+  if (sched_verbose >= 2)
+    {
+      char buf[2048];
+
+      if (point == 0)
+	{
+	  fprintf (sched_dump, "\n;;\tModel schedule:\n;;\n");
+	  fprintf (sched_dump, ";;\t| idx insn | mpri hght dpth prio |\n");
+	}
+      print_pattern (buf, PATTERN (insn->insn), 0);
+      fprintf (sched_dump, ";;\t| %3d %4d | %4d %4d %4d %4d | %-30s ",
+	       point, INSN_UID (insn->insn), insn->model_priority,
+	       insn->depth + insn->alap, insn->depth,
+	       INSN_PRIORITY (insn->insn), buf);
+    }
+  calculate_reg_deaths (insn->insn, death);
+  reg_pressure = INSN_REG_PRESSURE (insn->insn);
+  for (cci = 0; cci < ira_reg_class_cover_size; cci++)
+    {
+      cl = ira_reg_class_cover[cci];
+      delta = reg_pressure[cci].set_increase - death[cl];
+      if (sched_verbose >= 2)
+	fprintf (sched_dump, " %s:[%d,%+d]", reg_class_names[cl],
+		 curr_reg_pressure[cl], delta);
+      model_record_pressure (&model_before_pressure, point, cci,
+			     curr_reg_pressure[cl]);
+    }
+  if (sched_verbose >= 2)
+    fprintf (sched_dump, "\n");
+}
+
+/* All instructions have been added to the model schedule.  Record the
+   final register pressure in GROUP and set up all MODEL_MAX_PRESSUREs.  */
 
+static void
+model_record_final_pressures (struct model_pressure_group *group)
+{
+  int point, cci, max_pressure, ref_pressure, cl;
+
+  for (cci = 0; cci < ira_reg_class_cover_size; cci++)
+    {
+      /* Record the final pressure for this class.  */
+      cl = ira_reg_class_cover[cci];
+      point = model_num_insns;
+      ref_pressure = curr_reg_pressure[cl];
+      model_record_pressure (group, point, cci, ref_pressure);
+
+      /* Record the original maximum pressure.  */
+      group->limits[cci].orig_pressure = group->limits[cci].pressure;
+
+      /* Update the MODEL_MAX_PRESSURE for every point of the schedule.  */
+      max_pressure = ref_pressure;
+      MODEL_MAX_PRESSURE (group, point, cci) = max_pressure;
+      while (point > 0)
+	{
+	  point--;
+	  ref_pressure = MODEL_REF_PRESSURE (group, point, cci);
+	  max_pressure = MAX (max_pressure, ref_pressure);
+	  MODEL_MAX_PRESSURE (group, point, cci) = max_pressure;
+	}
+    }
+}
+
+/* Update all successors of INSN, given that INSN has just been scheduled.  */
+
+static void
+model_add_successors_to_worklist (struct model_insn_info *insn)
+{
+  sd_iterator_def sd_it;
+  struct model_insn_info *con;
+  dep_t dep;
+
+  FOR_EACH_DEP (insn->insn, SD_LIST_FORW, sd_it, dep)
+    {
+      con = MODEL_INSN_INFO (DEP_CON (dep));
+      /* Ignore debug instructions, and instructions from other blocks.  */
+      if (con->insn)
+	{
+	  con->unscheduled_preds--;
+
+	  /* Update the depth field of each true-dependent successor.
+	     Increasing the depth gives them a higher priority than
+	     before.  */
+	  if (DEP_TYPE (dep) == REG_DEP_TRUE && con->depth < insn->depth + 1)
+	    {
+	      con->depth = insn->depth + 1;
+	      if (QUEUE_INDEX (con->insn) == QUEUE_READY)
+		model_promote_insn (con);
+	    }
+
+	  /* If this is a true dependency, or if there are no remaining
+	     dependencies for CON (meaning that CON only had non-true
+	     dependencies), make sure that CON is on the worklist.
+	     We don't bother otherwise because it would tend to fill the
+	     worklist with a lot of low-priority instructions that are not
+	     yet ready to issue.  */
+	  if ((con->depth > 0 || con->unscheduled_preds == 0)
+	      && QUEUE_INDEX (con->insn) == QUEUE_NOWHERE)
+	    model_add_to_worklist (con, insn, insn->next);
+	}
+    }
+}
+
+/* Give INSN a higher priority than any current instruction, then give
+   unscheduled predecessors of INSN a higher priority still.  If any of
+   those predecessors are not on the model worklist, do the same for its
+   predecessors, and so on.  */
+
+static void
+model_promote_predecessors (struct model_insn_info *insn)
+{
+  struct model_insn_info *pro, *first;
+  sd_iterator_def sd_it;
+  dep_t dep;
+
+  if (sched_verbose >= 7)
+    fprintf (sched_dump, ";;\t+--- priority of %d = %d, priority of",
+	     INSN_UID (insn->insn), model_next_priority);
+  insn->model_priority = model_next_priority++;
+  model_remove_from_worklist (insn);
+  model_add_to_worklist_at (insn, NULL);
+
+  first = NULL;
+  for (;;)
+    {
+      FOR_EACH_DEP (insn->insn, SD_LIST_HARD_BACK, sd_it, dep)
+	{
+	  pro = MODEL_INSN_INFO (DEP_PRO (dep));
+	  /* The first test is to ignore debug instructions, and instructions
+	     from other blocks.  */
+	  if (pro->insn
+	      && pro->model_priority != model_next_priority
+	      && QUEUE_INDEX (pro->insn) != QUEUE_SCHEDULED)
+	    {
+	      pro->model_priority = model_next_priority;
+	      if (sched_verbose >= 7)
+		fprintf (sched_dump, " %d", INSN_UID (pro->insn));
+	      if (QUEUE_INDEX (pro->insn) == QUEUE_READY)
+		{
+		  /* PRO is already in the worklist, but it now has
+		     a higher priority than before.  Move it at the
+		     appropriate place.  */
+		  model_remove_from_worklist (pro);
+		  model_add_to_worklist (pro, NULL, model_worklist);
+		}
+	      else
+		{
+		  /* PRO isn't in the worklist.  Recursively process
+		     its predecessors until we find one that is.  */
+		  pro->next = first;
+		  first = pro;
+		}
+	    }
+	}
+      if (!first)
+	break;
+      insn = first;
+      first = insn->next;
+    }
+  if (sched_verbose >= 7)
+    fprintf (sched_dump, " = %d\n", model_next_priority);
+  model_next_priority++;
+}
+
+/* Pick one instruction from model_worklist and process it.  */
+
+static void
+model_choose_insn (void)
+{
+  struct model_insn_info *insn, *fallback;
+  int count;
+
+  if (sched_verbose >= 7)
+    {
+      fprintf (sched_dump, ";;\t+--- worklist:\n");
+      insn = model_worklist;
+      count = MAX_SCHED_READY_INSNS;
+      while (count > 0 && insn)
+	{
+	  fprintf (sched_dump, ";;\t+---   %d [%d, %d, %d, %d]\n",
+		   INSN_UID (insn->insn), insn->model_priority,
+		   insn->depth + insn->alap, insn->depth,
+		   INSN_PRIORITY (insn->insn));
+	  count--;
+	  insn = insn->next;
+	}
+    }
+
+  /* Look for a ready instruction whose model_classify_priority is zero
+     or negative, picking the highest-priority one.  Adding such an
+     instruction to the schedule now should do no harm, and may actually
+     do some good.
+
+     Failing that, see whether there is an instruction with the highest
+     extant model_priority that is not yet ready, but which would reduce
+     pressure if it became ready.  This is designed to catch cases like:
+
+       (set (mem (reg R1)) (reg R2))
+
+     where the instruction is the last remaining use of R1 and where the
+     value of R2 is not yet available (or vice versa).  The death of R1
+     means that this instruction already reduces pressure.  It is of
+     course possible that the computation of R2 involves other registers
+     that are hard to kill, but such cases are rare enough for this
+     heuristic to be a win in general.
+
+     Failing that, just pick the highest-priority instruction in the
+     worklist.  */
+  count = MAX_SCHED_READY_INSNS;
+  insn = model_worklist;
+  fallback = 0;
+  for (;;)
+    {
+      if (count == 0 || !insn)
+	{
+	  insn = fallback ? fallback : model_worklist;
+	  break;
+	}
+      if (insn->unscheduled_preds)
+	{
+	  if (model_worklist->model_priority == insn->model_priority
+	      && !fallback
+	      && model_classify_pressure (insn) < 0)
+	    fallback = insn;
+	}
+      else
+	{
+	  if (model_classify_pressure (insn) <= 0)
+	    break;
+	}
+      count--;
+      insn = insn->next;
+    }
+
+  if (sched_verbose >= 7 && insn != model_worklist)
+    {
+      if (insn->unscheduled_preds)
+	fprintf (sched_dump, ";;\t+--- promoting insn %d, with dependencies\n",
+		 INSN_UID (insn->insn));
+      else
+	fprintf (sched_dump, ";;\t+--- promoting insn %d, which is ready\n",
+		 INSN_UID (insn->insn));
+    }
+  if (insn->unscheduled_preds)
+    /* INSN isn't yet ready to issue.  Give all its predecessors the
+       highest priority.  */
+    model_promote_predecessors (insn);
+  else
+    {
+      /* INSN is ready.  Add it to the end of model_schedule and
+	 process its successors.  */
+      model_add_successors_to_worklist (insn);
+      model_remove_from_worklist (insn);
+      model_add_to_schedule (insn->insn);
+      model_record_pressures (insn);
+      update_register_pressure (insn->insn);
+    }
+}
+
+/* Restore all QUEUE_INDEXs to the values that they had before
+   model_start_schedule was called.  */
+
+static void
+model_reset_queue_indices (void)
+{
+  unsigned int i;
+  rtx insn;
+
+  FOR_EACH_VEC_ELT (rtx, model_schedule, i, insn)
+    QUEUE_INDEX (insn) = MODEL_INSN_INFO (insn)->old_queue;
+}
+
+/* We have calculated the model schedule and spill costs.  Print a summary
+   to sched_dump.  */
+
+static void
+model_dump_pressure_summary (void)
+{
+  int cci, cl;
+
+  fprintf (sched_dump, ";; Pressure summary:");
+  for (cci = 0; cci < ira_reg_class_cover_size; cci++)
+    {
+      cl = ira_reg_class_cover[cci];
+      fprintf (sched_dump, " %s:%d", reg_class_names[cl],
+	       model_before_pressure.limits[cci].pressure);
+    }
+  fprintf (sched_dump, "\n\n");
+}
+
+/* Initialize the SCHED_PRESSURE_MODEL information for the current
+   scheduling region.  */
+
+static void
+model_start_schedule (void)
+{
+  basic_block bb;
+
+  model_next_priority = 1;
+  model_schedule = VEC_alloc (rtx, heap, sched_max_luid);
+  model_insns = XCNEWVEC (struct model_insn_info, sched_max_luid);
+
+  bb = BLOCK_FOR_INSN (NEXT_INSN (current_sched_info->prev_head));
+  initiate_reg_pressure_info (df_get_live_in (bb));
+
+  model_analyze_insns ();
+  model_init_pressure_group (&model_before_pressure);
+  while (model_worklist)
+    model_choose_insn ();
+  gcc_assert (model_num_insns == (int) VEC_length (rtx, model_schedule));
+  if (sched_verbose >= 2)
+    fprintf (sched_dump, "\n");
+
+  model_record_final_pressures (&model_before_pressure);
+  model_reset_queue_indices ();
+
+  XDELETEVEC (model_insns);
+
+  model_curr_point = 0;
+  initiate_reg_pressure_info (df_get_live_in (bb));
+  if (sched_verbose >= 1)
+    model_dump_pressure_summary ();
+}
+
+/* Free the information associated with GROUP.  */
+
+static void
+model_finalize_pressure_group (struct model_pressure_group *group)
+{
+  XDELETEVEC (group->model);
+}
+
+/* Free the information created by model_start_schedule.  */
+
+static void
+model_end_schedule (void)
+{
+  model_finalize_pressure_group (&model_before_pressure);
+  VEC_free (rtx, heap, model_schedule);
+}
+
 /* INSN is the "currently executing insn".  Launch each insn which was
    waiting on INSN.  READY is the ready list which contains the insns
    that are ready to fire.  CLOCK is the current cycle.  The function
@@ -1667,10 +3065,14 @@
 		     reg_class_names[ira_reg_class_cover[i]],
 		     pressure_info[i].set_increase, pressure_info[i].change);
 	}
+      if (sched_pressure == SCHED_PRESSURE_MODEL
+	  && model_curr_point < model_num_insns
+	  && model_index (insn) == model_curr_point)
+	fprintf (sched_dump, ":model %d", model_curr_point);
       fputc ('\n', sched_dump);
     }
 
-  if (sched_pressure_p && !DEBUG_INSN_P (insn))
+  if (sched_pressure == SCHED_PRESSURE_WEIGHTED && !DEBUG_INSN_P (insn))
     update_reg_and_insn_max_reg_pressure (insn);
 
   /* Scheduling instruction should have all its dependencies resolved and
@@ -1728,6 +3130,24 @@
   gcc_assert (QUEUE_INDEX (insn) == QUEUE_NOWHERE);
   QUEUE_INDEX (insn) = QUEUE_SCHEDULED;
 
+  if (sched_pressure == SCHED_PRESSURE_MODEL
+      && model_curr_point < model_num_insns
+      && NONDEBUG_INSN_P (insn))
+    {
+      if (model_index (insn) == model_curr_point)
+	do
+	  model_curr_point++;
+	while (model_curr_point < model_num_insns
+	       && (QUEUE_INDEX (MODEL_INSN (model_curr_point))
+		   == QUEUE_SCHEDULED));
+      else
+	model_recompute (insn);
+      model_update_limit_points ();
+      update_register_pressure (insn);
+      if (sched_verbose >= 2)
+	print_curr_reg_pressure ();
+    }
+
   gcc_assert (INSN_TICK (insn) >= MIN_TICK);
   if (INSN_TICK (insn) > clock_var)
     /* INSN has been prematurely moved from the queue to the ready list.
@@ -2056,7 +3476,16 @@
       /* If the ready list is full, delay the insn for 1 cycle.
 	 See the comment in schedule_block for the rationale.  */
       if (!reload_completed
-	  && ready->n_ready - ready->n_debug > MAX_SCHED_READY_INSNS
+	  && (ready->n_ready - ready->n_debug > MAX_SCHED_READY_INSNS
+	      || (sched_pressure == SCHED_PRESSURE_MODEL
+		  /* Limit pressure recalculations to MAX_SCHED_READY_INSNS
+		     instructions too.  */
+		  && model_index (insn) > (model_curr_point
+					   + MAX_SCHED_READY_INSNS)))
+	  && !(sched_pressure == SCHED_PRESSURE_MODEL
+	       && model_curr_point < model_num_insns
+	       /* Always allow the next model instruction to issue.  */
+	       && model_index (insn) == model_curr_point)
 	  && !SCHED_GROUP_P (insn)
 	  && insn != skip_insn)
 	{
@@ -2293,12 +3722,12 @@
       fprintf (sched_dump, "  %s:%d",
 	       (*current_sched_info->print_insn) (p[i], 0),
 	       INSN_LUID (p[i]));
-      if (sched_pressure_p)
+      if (sched_pressure != SCHED_PRESSURE_NONE)
 	fprintf (sched_dump, "(cost=%d",
 		 INSN_REG_PRESSURE_EXCESS_COST_CHANGE (p[i]));
       if (INSN_TICK (p[i]) > clock_var)
 	fprintf (sched_dump, ":delay=%d", INSN_TICK (p[i]) - clock_var);
-      if (sched_pressure_p)
+      if (sched_pressure != SCHED_PRESSURE_NONE)
 	fprintf (sched_dump, ")");
     }
   fprintf (sched_dump, "\n");
@@ -2609,8 +4038,8 @@
 	    {
 	      if (state_dead_lock_p (state)
 		  || insn_finishes_cycle_p (insn))
- 		/* We won't issue any more instructions in the next
- 		   choice_state.  */
+		/* We won't issue any more instructions in the next
+		   choice_state.  */
 		top->rest = 0;
 	      else
 		top->rest--;
@@ -2813,6 +4242,59 @@
     }
 }
 
+/* Examine all insns on the ready list and queue those which can't be
+   issued in this cycle.  TEMP_STATE is temporary scheduler state we
+   can use as scratch space.  If FIRST_CYCLE_INSN_P is true, no insns
+   have been issued for the current cycle, which means it is valid to
+   issue an asm statement.  */
+
+static void
+prune_ready_list (state_t temp_state, bool first_cycle_insn_p)
+{
+  int i;
+
+ restart:
+  for (i = 0; i < ready.n_ready; i++)
+    {
+      rtx insn = ready_element (&ready, i);
+      int cost = 0;
+
+      if (recog_memoized (insn) < 0)
+	{
+	  if (!first_cycle_insn_p
+	      && (GET_CODE (PATTERN (insn)) == ASM_INPUT
+		  || asm_noperands (PATTERN (insn)) >= 0))
+	    cost = 1;
+	}
+      else if (sched_pressure != SCHED_PRESSURE_NONE)
+	{
+	  if (sched_pressure == SCHED_PRESSURE_MODEL
+	      && INSN_TICK (insn) <= clock_var)
+	    {
+	      memcpy (temp_state, curr_state, dfa_state_size);
+	      if (state_transition (temp_state, insn) >= 0)
+		INSN_TICK (insn) = clock_var + 1;
+	    }
+	  cost = 0;
+	}
+      else
+	{
+	  memcpy (temp_state, curr_state, dfa_state_size);
+	  cost = state_transition (temp_state, insn);
+	  if (cost < 0)
+	    cost = 0;
+	  else if (cost == 0)
+	    cost = 1;
+	}
+      if (cost >= 1)
+	{
+	  ready_remove (&ready, i);
+	  queue_insn (insn, cost);
+	  goto restart;
+	}
+    }
+}
+
 /* Use forward list scheduling to rearrange insns of block pointed to by
    TARGET_BB, possibly bringing insns from subsequent blocks in the same
    region.  */
@@ -2882,6 +4364,9 @@
      in try_ready () (which is called through init_ready_list ()).  */
   (*current_sched_info->init_ready_list) ();
 
+  if (sched_pressure == SCHED_PRESSURE_MODEL)
+    model_start_schedule ();
+
   /* The algorithm is O(n^2) in the number of ready insns at any given
      time in the worst case.  Before reload we are more likely to have
      big lists so truncate them to a reasonable size.  */
@@ -2963,6 +4448,10 @@
 	}
       while (advance > 0);
 
+      prune_ready_list (temp_state, true);
+      if (ready.n_ready == 0)
+        continue;
+
       if (sort_p)
 	{
 	  /* Sort the ready list based on priority.  */
@@ -3040,7 +4529,7 @@
 	      fprintf (sched_dump, ";;\tReady list (t = %3d):  ",
 		       clock_var);
 	      debug_ready_list (&ready);
-	      if (sched_pressure_p)
+	      if (sched_pressure == SCHED_PRESSURE_WEIGHTED)
 		print_curr_reg_pressure ();
 	    }
 
@@ -3084,7 +4573,8 @@
 	  else
 	    insn = ready_remove_first (&ready);
 
-	  if (sched_pressure_p && INSN_TICK (insn) > clock_var)
+	  if (sched_pressure != SCHED_PRESSURE_NONE
+	      && INSN_TICK (insn) > clock_var)
 	    {
 	      ready_add (&ready, insn, true);
 	      advance = 1;
@@ -3112,44 +4602,6 @@
 	    }
 
 	  sort_p = TRUE;
-	  memcpy (temp_state, curr_state, dfa_state_size);
-	  if (recog_memoized (insn) < 0)
-	    {
-	      asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
-		       || asm_noperands (PATTERN (insn)) >= 0);
-	      if (!first_cycle_insn_p && asm_p)
-		/* This is asm insn which is tried to be issued on the
-		   cycle not first.  Issue it on the next cycle.  */
-		cost = 1;
-	      else
-		/* A USE insn, or something else we don't need to
-		   understand.  We can't pass these directly to
-		   state_transition because it will trigger a
-		   fatal error for unrecognizable insns.  */
-		cost = 0;
-	    }
-	  else if (sched_pressure_p)
-	    cost = 0;
-	  else
-	    {
-	      cost = state_transition (temp_state, insn);
-	      if (cost < 0)
-		cost = 0;
-	      else if (cost == 0)
-		cost = 1;
-	    }
-
-	  if (cost >= 1)
-	    {
-	      queue_insn (insn, cost);
- 	      if (SCHED_GROUP_P (insn))
- 		{
- 		  advance = cost;
- 		  break;
- 		}
-
-	      continue;
-	    }
 
 	  if (current_sched_info->can_schedule_ready_p
 	      && ! (*current_sched_info->can_schedule_ready_p) (insn))
@@ -3200,11 +4652,17 @@
 	  reemit_notes (insn);
 	  last_scheduled_insn = insn;
 
-	  if (memcmp (curr_state, temp_state, dfa_state_size) != 0)
-            {
-              cycle_issued_insns++;
-              memcpy (curr_state, temp_state, dfa_state_size);
-            }
+	  if (recog_memoized (insn) >= 0)
+	    {
+	      cost = state_transition (curr_state, insn);
+	      if (sched_pressure != SCHED_PRESSURE_WEIGHTED)
+		gcc_assert (cost < 0);
+	      cycle_issued_insns++;
+	      asm_p = false;
+	    }
+	  else
+	    asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
+		     || asm_noperands (PATTERN (insn)) >= 0);
 
 	  if (targetm.sched.variable_issue)
 	    can_issue_more =
@@ -3225,6 +4683,9 @@
 
 	  first_cycle_insn_p = false;
 
+	  if (ready.n_ready > 0)
+            prune_ready_list (temp_state, false);
+
 	  /* Sort the ready list based on priority.  This must be
 	     redone here, as schedule_insn may have readied additional
 	     insns that will not be sorted correctly.  */
@@ -3321,6 +4782,9 @@
 	  }
     }
 
+  if (sched_pressure == SCHED_PRESSURE_MODEL)
+    model_end_schedule ();
+
   if (sched_verbose)
     fprintf (sched_dump, ";;   total time = %d\n", clock_var);
 
@@ -3424,10 +4888,14 @@
   if (targetm.sched.dispatch (NULL_RTX, IS_DISPATCH_ON))
     targetm.sched.dispatch_do (NULL_RTX, DISPATCH_INIT);
 
-  sched_pressure_p = (flag_sched_pressure && ! reload_completed
-		      && common_sched_info->sched_pass_id == SCHED_RGN_PASS);
+  if (flag_sched_pressure
+      && !reload_completed
+      && common_sched_info->sched_pass_id == SCHED_RGN_PASS)
+    sched_pressure = flag_sched_pressure_algorithm;
+  else
+    sched_pressure = SCHED_PRESSURE_NONE;
 
-  if (sched_pressure_p)
+  if (sched_pressure != SCHED_PRESSURE_NONE)
     ira_setup_eliminable_regset ();
 
   /* Initialize SPEC_INFO.  */
@@ -3504,7 +4972,7 @@
   if (targetm.sched.init_global)
     targetm.sched.init_global (sched_dump, sched_verbose, get_max_uid () + 1);
 
-  if (sched_pressure_p)
+  if (sched_pressure != SCHED_PRESSURE_NONE)
     {
       int i, max_regno = max_reg_num ();
 
@@ -3517,8 +4985,11 @@
 	     ? ira_class_translate[REGNO_REG_CLASS (i)]
 	     : reg_cover_class (i));
       curr_reg_live = BITMAP_ALLOC (NULL);
-      saved_reg_live = BITMAP_ALLOC (NULL);
-      region_ref_regs = BITMAP_ALLOC (NULL);
+      if (sched_pressure == SCHED_PRESSURE_WEIGHTED)
+	{
+	  saved_reg_live = BITMAP_ALLOC (NULL);
+	  region_ref_regs = BITMAP_ALLOC (NULL);
+	}
     }
 
   curr_state = xmalloc (dfa_state_size);
@@ -3618,12 +5089,15 @@
 sched_finish (void)
 {
   haifa_finish_h_i_d ();
-  if (sched_pressure_p)
+  if (sched_pressure != SCHED_PRESSURE_NONE)
     {
-      free (sched_regno_cover_class);
-      BITMAP_FREE (region_ref_regs);
-      BITMAP_FREE (saved_reg_live);
+      if (sched_pressure == SCHED_PRESSURE_WEIGHTED)
+	{
+	  BITMAP_FREE (region_ref_regs);
+	  BITMAP_FREE (saved_reg_live);
+	}
       BITMAP_FREE (curr_reg_live);
+      free (sched_regno_cover_class);
     }
   free (curr_state);
 
@@ -3939,7 +5413,7 @@
   INSN_TICK (next) = tick;
 
   delay = tick - clock_var;
-  if (delay <= 0 || sched_pressure_p)
+  if (delay <= 0 || sched_pressure != SCHED_PRESSURE_NONE)
     delay = QUEUE_READY;
 
   change_queue_index (next, delay);
@@ -5188,7 +6662,7 @@
       if (insn == jump)
 	break;
 
-      if (dep_list_size (insn) == 0)
+      if (dep_list_size (insn, SD_LIST_FORW) == 0)
 	{
 	  dep_def _new_dep, *new_dep = &_new_dep;
 
@@ -5559,6 +7033,7 @@
 
   FOR_EACH_VEC_ELT (haifa_insn_data_def, h_i_d, i, data)
     {
+      free (data->max_reg_pressure);
       if (data->reg_pressure != NULL)
 	free (data->reg_pressure);
       for (use = data->reg_use_list; use != NULL; use = next)
--- a/src/gcc/hooks.c
+++ b/src/gcc/hooks.c
@@ -101,6 +101,15 @@
   return true;
 }
 
+/* Generic hook that takes (enum machine_mode, unsigned HOST_WIDE_INT)
+   and returns false.  */
+bool
+hook_bool_mode_uhwi_false (enum machine_mode mode ATTRIBUTE_UNUSED,
+			   unsigned HOST_WIDE_INT value ATTRIBUTE_UNUSED)
+{
+  return false;
+}
+
 /* Generic hook that takes (FILE *, const char *) and does nothing.  */
 void
 hook_void_FILEptr_constcharptr (FILE *a ATTRIBUTE_UNUSED, const char *b ATTRIBUTE_UNUSED)
--- a/src/gcc/hooks.h
+++ b/src/gcc/hooks.h
@@ -34,6 +34,8 @@
 extern bool hook_bool_mode_true (enum machine_mode);
 extern bool hook_bool_mode_const_rtx_false (enum machine_mode, const_rtx);
 extern bool hook_bool_mode_const_rtx_true (enum machine_mode, const_rtx);
+extern bool hook_bool_mode_uhwi_false (enum machine_mode,
+				       unsigned HOST_WIDE_INT);
 extern bool hook_bool_tree_false (tree);
 extern bool hook_bool_const_tree_false (const_tree);
 extern bool hook_bool_tree_true (tree);
--- a/src/gcc/ifcvt.c
+++ b/src/gcc/ifcvt.c
@@ -1,5 +1,6 @@
 /* If-conversion support.
-   Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2010
+   Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2010,
+   2011
    Free Software Foundation, Inc.
 
    This file is part of GCC.
@@ -304,6 +305,10 @@
 
   for (insn = start; ; insn = NEXT_INSN (insn))
     {
+      /* dwarf2out can't cope with conditional prologues.  */
+      if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_PROLOGUE_END)
+	return FALSE;
+
       if (NOTE_P (insn) || DEBUG_INSN_P (insn))
 	goto insn_done;
 
--- a/src/gcc/internal-fn.c
+++ b/src/gcc/internal-fn.c
@@ -0,0 +1,147 @@
+/* Internal functions.
+   Copyright (C) 2011 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "gimple.h"
+#include "tree.h"
+#include "expr.h"
+#include "optabs.h"
+#include "recog.h"
+
+/* The names of each internal function, indexed by function number.  */
+const char *const internal_fn_name_array[] = {
+#define DEF_INTERNAL_FN(CODE, FLAGS) #CODE,
+#include "internal-fn.def"
+#undef DEF_INTERNAL_FN
+  "<invalid-fn>"
+};
+
+/* The ECF_* flags of each internal function, indexed by function number.  */
+const int internal_fn_flags_array[] = {
+#define DEF_INTERNAL_FN(CODE, FLAGS) FLAGS,
+#include "internal-fn.def"
+#undef DEF_INTERNAL_FN
+  0
+};
+
+/* ARRAY_TYPE is an array of vector modes.  Return the associated insn
+   for load-lanes-style optab OPTAB.  The insn must exist.  */
+
+static enum insn_code
+get_multi_vector_move (tree array_type, convert_optab optab)
+{
+  enum insn_code icode;
+  enum machine_mode imode;
+  enum machine_mode vmode;
+
+  gcc_assert (TREE_CODE (array_type) == ARRAY_TYPE);
+  imode = TYPE_MODE (array_type);
+  vmode = TYPE_MODE (TREE_TYPE (array_type));
+
+  icode = convert_optab_handler (optab, imode, vmode);
+  gcc_assert (icode != CODE_FOR_nothing);
+  return icode;
+}
+
+/* Expand LOAD_LANES call STMT.  */
+
+static void
+expand_LOAD_LANES (gimple stmt)
+{
+  tree type, lhs, rhs;
+  rtx target, mem;
+  enum insn_code icode;
+  const struct insn_operand_data *operand;
+
+  lhs = gimple_call_lhs (stmt);
+  rhs = gimple_call_arg (stmt, 0);
+  type = TREE_TYPE (lhs);
+
+  target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+  mem = expand_normal (rhs);
+
+  gcc_assert (REG_P (target));
+  gcc_assert (MEM_P (mem));
+  PUT_MODE (mem, TYPE_MODE (type));
+
+  icode = get_multi_vector_move (type, vec_load_lanes_optab);
+
+  operand = &insn_data[(int) icode].operand[1];
+  if (operand->predicate && !operand->predicate (mem, operand->mode))
+    mem = replace_equiv_address (mem, force_reg (Pmode, XEXP (mem, 0)));
+
+  emit_insn (GEN_FCN (icode) (target, mem));
+}
+
+/* Expand STORE_LANES call STMT.  */
+
+static void
+expand_STORE_LANES (gimple stmt)
+{
+  tree type, lhs, rhs;
+  rtx target, reg;
+  enum insn_code icode;
+  const struct insn_operand_data *operand;
+
+  lhs = gimple_call_lhs (stmt);
+  rhs = gimple_call_arg (stmt, 0);
+  type = TREE_TYPE (rhs);
+
+  target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+  reg = expand_normal (rhs);
+
+  gcc_assert (MEM_P (target));
+  PUT_MODE (target, TYPE_MODE (type));
+
+  icode = get_multi_vector_move (type, vec_store_lanes_optab);
+
+  operand = &insn_data[(int) icode].operand[0];
+  if (operand->predicate && !operand->predicate (target, operand->mode))
+    target = replace_equiv_address (target,
+				    force_reg (Pmode, XEXP (target, 0)));
+
+  operand = &insn_data[(int) icode].operand[1];
+  if (operand->predicate && !operand->predicate (reg, operand->mode))
+    reg = force_reg (TYPE_MODE (type), reg);
+
+  emit_insn (GEN_FCN (icode) (target, reg));
+}
+
+/* Routines to expand each internal function, indexed by function number.
+   Each routine has the prototype:
+
+       expand_<NAME> (gimple stmt)
+
+   where STMT is the statement that performs the call. */
+static void (*const internal_fn_expanders[]) (gimple) = {
+#define DEF_INTERNAL_FN(CODE, FLAGS) expand_##CODE,
+#include "internal-fn.def"
+#undef DEF_INTERNAL_FN
+  0
+};
+
+/* Expand STMT, which is a call to internal function FN.  */
+
+void
+expand_internal_call (gimple stmt)
+{
+  internal_fn_expanders[(int) gimple_call_internal_fn (stmt)] (stmt);
+}
--- a/src/gcc/internal-fn.def
+++ b/src/gcc/internal-fn.def
@@ -0,0 +1,42 @@
+/* Internal functions.
+   Copyright (C) 2011 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* This file specifies a list of internal "functions".  These functions
+   differ from built-in functions in that they have no linkage and cannot
+   be called directly by the user.  They represent operations that are only
+   synthesised by GCC itself.
+
+   Internal functions are used instead of tree codes if the operation
+   and its operands are more naturally represented as a GIMPLE_CALL
+   than a GIMPLE_ASSIGN.
+
+   Each entry in this file has the form:
+
+     DEF_INTERNAL_FN (NAME, FLAGS)
+
+   where NAME is the name of the function and FLAGS is a set of
+   ECF_* flags.  Each entry must have a corresponding expander
+   of the form:
+
+     void expand_NAME (gimple stmt)
+
+   where STMT is the statement that performs the call.  */
+
+DEF_INTERNAL_FN (LOAD_LANES, ECF_CONST | ECF_LEAF)
+DEF_INTERNAL_FN (STORE_LANES, ECF_CONST | ECF_LEAF)
--- a/src/gcc/internal-fn.h
+++ b/src/gcc/internal-fn.h
@@ -0,0 +1,52 @@
+/* Internal functions.
+   Copyright (C) 2011 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_INTERNAL_FN_H
+#define GCC_INTERNAL_FN_H
+
+enum internal_fn {
+#define DEF_INTERNAL_FN(CODE, FLAGS) IFN_##CODE,
+#include "internal-fn.def"
+#undef DEF_INTERNAL_FN
+  IFN_LAST
+};
+
+extern const char *const internal_fn_name_array[];
+extern const int internal_fn_flags_array[];
+
+/* Return the name of internal function FN.  The name is only meaningful
+   for dumps; it has no linkage.  */
+
+static inline const char *
+internal_fn_name (enum internal_fn fn)
+{
+  return internal_fn_name_array[(int) fn];
+}
+
+/* Return the ECF_* flags for function FN.  */
+
+static inline int
+internal_fn_flags (enum internal_fn fn)
+{
+  return internal_fn_flags_array[(int) fn];
+}
+
+extern void expand_internal_call (gimple);
+
+#endif
--- a/src/gcc/ipa-prop.c
+++ b/src/gcc/ipa-prop.c
@@ -1417,6 +1417,8 @@
 {
   tree target = gimple_call_fn (call);
 
+  if (!target)
+    return;
   if (TREE_CODE (target) == SSA_NAME)
     ipa_analyze_indirect_call_uses (node, info, parms_info, call, target);
   else if (TREE_CODE (target) == OBJ_TYPE_REF)
--- a/src/gcc/LINARO-VERSION
+++ b/src/gcc/LINARO-VERSION
@@ -0,0 +1 @@
+4.6-2013.04
--- a/src/gcc/longlong.h
+++ b/src/gcc/longlong.h
@@ -203,7 +203,7 @@
 UDItype __umulsidi3 (USItype, USItype);
 #endif
 
-#if defined (__arm__) && !defined (__thumb__) && W_TYPE_SIZE == 32
+#if defined (__arm__) && (defined (__thumb2__) || !defined (__thumb__)) && W_TYPE_SIZE == 32
 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   __asm__ ("adds	%1, %4, %5\n\tadc	%0, %2, %3"		\
 	   : "=r" ((USItype) (sh)),					\
@@ -220,9 +220,12 @@
 	     "rI" ((USItype) (bh)),					\
 	     "r" ((USItype) (al)),					\
 	     "rI" ((USItype) (bl)) __CLOBBER_CC)
-#define umul_ppmm(xh, xl, a, b) \
-{register USItype __t0, __t1, __t2;					\
-  __asm__ ("%@ Inlined umul_ppmm\n"					\
+# if defined(__ARM_ARCH_2__) || defined(__ARM_ARCH_2A__) \
+     || defined(__ARM_ARCH_3__)
+#  define umul_ppmm(xh, xl, a, b)					\
+  do {									\
+    register USItype __t0, __t1, __t2;					\
+    __asm__ ("%@ Inlined umul_ppmm\n"					\
 	   "	mov	%2, %5, lsr #16\n"				\
 	   "	mov	%0, %6, lsr #16\n"				\
 	   "	bic	%3, %5, %2, lsl #16\n"				\
@@ -239,14 +242,26 @@
 	     "=r" ((USItype) (xl)),					\
 	     "=&r" (__t0), "=&r" (__t1), "=r" (__t2)			\
 	   : "r" ((USItype) (a)),					\
-	     "r" ((USItype) (b)) __CLOBBER_CC );}
-#define UMUL_TIME 20
-#define UDIV_TIME 100
+	     "r" ((USItype) (b)) __CLOBBER_CC );			\
+  } while (0)
+#  define UMUL_TIME 20
+# else
+#  define umul_ppmm(xh, xl, a, b)					\
+  do {									\
+    /* Generate umull, under compiler control.  */			\
+    register UDItype __t0 = (UDItype)(USItype)(a) * (USItype)(b);	\
+    (xl) = (USItype)__t0;						\
+    (xh) = (USItype)(__t0 >> 32);					\
+  } while (0)
+#  define UMUL_TIME 3
+# endif
+# define UDIV_TIME 100
 #endif /* __arm__ */
 
 #if defined(__arm__)
 /* Let gcc decide how best to implement count_leading_zeros.  */
 #define count_leading_zeros(COUNT,X)	((COUNT) = __builtin_clz (X))
+#define count_trailing_zeros(COUNT,X)   ((COUNT) = __builtin_ctz (X))
 #define COUNT_LEADING_ZEROS_0 32
 #endif
 
--- a/src/gcc/loop-doloop.c
+++ b/src/gcc/loop-doloop.c
@@ -78,6 +78,8 @@
   rtx inc_src;
   rtx condition;
   rtx pattern;
+  rtx cc_reg = NULL_RTX;
+  rtx reg_orig = NULL_RTX;
 
   /* The canonical doloop pattern we expect has one of the following
      forms:
@@ -96,7 +98,16 @@
      2)  (set (reg) (plus (reg) (const_int -1))
          (set (pc) (if_then_else (reg != 0)
 	                         (label_ref (label))
-			         (pc))).  */
+			         (pc))).  
+
+     Some targets (ARM) do the comparison before the branch, as in the
+     following form:
+
+     3) (parallel [(set (cc) (compare ((plus (reg) (const_int -1), 0)))
+                   (set (reg) (plus (reg) (const_int -1)))])
+        (set (pc) (if_then_else (cc == NE)
+                                (label_ref (label))
+                                (pc))) */
 
   pattern = PATTERN (doloop_pat);
 
@@ -104,19 +115,47 @@
     {
       rtx cond;
       rtx prev_insn = prev_nondebug_insn (doloop_pat);
+      rtx cmp_arg1, cmp_arg2;
+      rtx cmp_orig;
 
-      /* We expect the decrement to immediately precede the branch.  */
+      /* In case the pattern is not PARALLEL we expect two forms
+	 of doloop which are cases 2) and 3) above: in case 2) the
+	 decrement immediately precedes the branch, while in case 3)
+	 the compare and decrement instructions immediately precede
+	 the branch.  */
 
       if (prev_insn == NULL_RTX || !INSN_P (prev_insn))
         return 0;
 
       cmp = pattern;
-      inc = PATTERN (PREV_INSN (doloop_pat));
+      if (GET_CODE (PATTERN (prev_insn)) == PARALLEL)
+        {
+	  /* The third case: the compare and decrement instructions
+	     immediately precede the branch.  */
+	  cmp_orig = XVECEXP (PATTERN (prev_insn), 0, 0);
+	  if (GET_CODE (cmp_orig) != SET)
+	    return 0;
+	  if (GET_CODE (SET_SRC (cmp_orig)) != COMPARE)
+	    return 0;
+	  cmp_arg1 = XEXP (SET_SRC (cmp_orig), 0);
+          cmp_arg2 = XEXP (SET_SRC (cmp_orig), 1);
+	  if (cmp_arg2 != const0_rtx 
+	      || GET_CODE (cmp_arg1) != PLUS)
+	    return 0;
+	  reg_orig = XEXP (cmp_arg1, 0);
+	  if (XEXP (cmp_arg1, 1) != GEN_INT (-1) 
+	      || !REG_P (reg_orig))
+	    return 0;
+	  cc_reg = SET_DEST (cmp_orig);
+	  
+	  inc = XVECEXP (PATTERN (prev_insn), 0, 1);
+	}
+      else
+        inc = PATTERN (prev_insn);
       /* We expect the condition to be of the form (reg != 0)  */
       cond = XEXP (SET_SRC (cmp), 0);
       if (GET_CODE (cond) != NE || XEXP (cond, 1) != const0_rtx)
         return 0;
-
     }
   else
     {
@@ -162,11 +201,15 @@
     return 0;
 
   if ((XEXP (condition, 0) == reg)
+      /* For the third case:  */  
+      || ((cc_reg != NULL_RTX)
+	  && (XEXP (condition, 0) == cc_reg)
+	  && (reg_orig == reg))
       || (GET_CODE (XEXP (condition, 0)) == PLUS
-		   && XEXP (XEXP (condition, 0), 0) == reg))
+	  && XEXP (XEXP (condition, 0), 0) == reg))
    {
      if (GET_CODE (pattern) != PARALLEL)
-     /*  The second form we expect:
+     /*  For the second form we expect:
 
          (set (reg) (plus (reg) (const_int -1))
          (set (pc) (if_then_else (reg != 0)
@@ -181,7 +224,24 @@
                      (set (reg) (plus (reg) (const_int -1)))
                      (additional clobbers and uses)])
 
-         So we return that form instead.
+        For the third form we expect:
+
+        (parallel [(set (cc) (compare ((plus (reg) (const_int -1)), 0))
+                   (set (reg) (plus (reg) (const_int -1)))])
+        (set (pc) (if_then_else (cc == NE)
+                                (label_ref (label))
+                                (pc))) 
+
+        which is equivalent to the following:
+
+        (parallel [(set (cc) (compare (reg,  1))
+                   (set (reg) (plus (reg) (const_int -1)))
+                   (set (pc) (if_then_else (NE == cc)
+                                           (label_ref (label))
+                                           (pc))))])
+
+        So we return the second form instead for the two cases.
+
      */
         condition = gen_rtx_fmt_ee (NE, VOIDmode, inc_src, const1_rtx);
 
--- a/src/gcc/Makefile.in
+++ b/src/gcc/Makefile.in
@@ -903,6 +903,8 @@
 READ_MD_H = $(OBSTACK_H) $(HASHTAB_H) read-md.h
 PARAMS_H = params.h params.def
 BUILTINS_DEF = builtins.def sync-builtins.def omp-builtins.def
+INTERNAL_FN_DEF = internal-fn.def
+INTERNAL_FN_H = internal-fn.h $(INTERNAL_FN_DEF)
 TREE_H = tree.h all-tree.def tree.def c-family/c-common.def \
 	$(lang_tree_files) $(MACHMODE_H) tree-check.h $(BUILTINS_DEF) \
 	$(INPUT_H) statistics.h $(VEC_H) treestruct.def $(HASHTAB_H) \
@@ -912,7 +914,7 @@
 BASIC_BLOCK_H = basic-block.h $(PREDICT_H) $(VEC_H) $(FUNCTION_H) cfghooks.h
 GIMPLE_H = gimple.h gimple.def gsstruct.def pointer-set.h $(VEC_H) \
 	$(GGC_H) $(BASIC_BLOCK_H) $(TARGET_H) tree-ssa-operands.h \
-	tree-ssa-alias.h vecir.h
+	tree-ssa-alias.h vecir.h $(INTERNAL_FN_H)
 GCOV_IO_H = gcov-io.h gcov-iov.h auto-host.h
 COVERAGE_H = coverage.h $(GCOV_IO_H)
 DEMANGLE_H = $(srcdir)/../include/demangle.h
@@ -1284,6 +1286,7 @@
 	init-regs.o \
 	input.o \
 	integrate.o \
+	internal-fn.o \
 	intl.o \
 	ira.o \
 	ira-build.o \
@@ -2438,7 +2441,8 @@
 tree-ssa-phiopt.o : tree-ssa-phiopt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
    $(TM_H) $(GGC_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \
    $(TREE_FLOW_H) $(TREE_PASS_H) $(TREE_DUMP_H) langhooks.h $(FLAGS_H) \
-   $(DIAGNOSTIC_H) $(TIMEVAR_H) pointer-set.h domwalk.h
+   $(DIAGNOSTIC_H) $(TIMEVAR_H) pointer-set.h domwalk.h $(CFGLOOP_H) \
+   $(TREE_DATA_REF_H)
 tree-nrv.o : tree-nrv.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
    $(TM_H) $(TREE_H) $(FUNCTION_H) $(BASIC_BLOCK_H) $(FLAGS_H) \
    $(DIAGNOSTIC_H) $(TREE_FLOW_H) $(TIMEVAR_H) $(TREE_DUMP_H) $(TREE_PASS_H) \
@@ -2679,7 +2683,7 @@
    $(TREE_PASS_H) $(PARAMS_H) gt-tree-scalar-evolution.h
 tree-data-ref.o : tree-data-ref.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
    gimple-pretty-print.h $(TREE_FLOW_H) $(CFGLOOP_H) $(TREE_DATA_REF_H) \
-   $(TREE_PASS_H) langhooks.h
+   $(TREE_PASS_H) langhooks.h tree-affine.h
 sese.o : sese.c sese.h $(CONFIG_H) $(SYSTEM_H) coretypes.h tree-pretty-print.h \
    $(TREE_FLOW_H) $(CFGLOOP_H) $(TREE_DATA_REF_H) tree-pass.h value-prof.h
 graphite.o : graphite.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(DIAGNOSTIC_CORE_H) \
@@ -2766,6 +2770,8 @@
    $(TM_H) $(TREE_H) $(DIAGNOSTIC_CORE_H) $(DIAGNOSTIC_H) $(TREE_FLOW_H) \
    $(TREE_PASS_H) tree-ssa-propagate.h tree-pretty-print.h \
    gimple-pretty-print.h
+internal-fn.o : internal-fn.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+   $(GIMPLE_H) $(TREE_H) $(EXPR_H) $(OPTABS_H) $(RECOG_H)
 gimple.o : gimple.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TREE_H) \
    $(GGC_H) $(GIMPLE_H) $(DIAGNOSTIC_CORE_H) $(DIAGNOSTIC_H) gt-gimple.h \
    $(TREE_FLOW_H) value-prof.h $(FLAGS_H) $(DEMANGLE_H) \
--- a/src/gcc/modulo-sched.c
+++ b/src/gcc/modulo-sched.c
@@ -116,14 +116,18 @@
 
 /* The number of different iterations the nodes in ps span, assuming
    the stage boundaries are placed efficiently.  */
-#define PS_STAGE_COUNT(ps) ((PS_MAX_CYCLE (ps) - PS_MIN_CYCLE (ps) \
-			     + 1 + (ps)->ii - 1) / (ps)->ii)
+#define CALC_STAGE_COUNT(max_cycle,min_cycle,ii) ((max_cycle - min_cycle \
+                         + 1 + ii - 1) / ii)
+/* The stage count of ps.  */
+#define PS_STAGE_COUNT(ps) (((partial_schedule_ptr)(ps))->stage_count)
 
 /* A single instruction in the partial schedule.  */
 struct ps_insn
 {
-  /* The corresponding DDG_NODE.  */
-  ddg_node_ptr node;
+  /* Identifies the instruction to be scheduled.  Values smaller than
+     the ddg's num_nodes refer directly to ddg nodes.  A value of
+     X - num_nodes refers to register move X.  */
+  int id;
 
   /* The (absolute) cycle in which the PS instruction is scheduled.
      Same as SCHED_TIME (node).  */
@@ -133,10 +137,35 @@
   ps_insn_ptr next_in_row,
 	      prev_in_row;
 
-  /* The number of nodes in the same row that come after this node.  */
-  int row_rest_count;
 };
 
+/* Information about a register move that has been added to a partial
+   schedule.  */
+struct ps_reg_move_info
+{
+  /* The source of the move is defined by the ps_insn with id DEF.
+     The destination is used by the ps_insns with the ids in USES.  */
+  int def;
+  sbitmap uses;
+
+  /* The original form of USES' instructions used OLD_REG, but they
+     should now use NEW_REG.  */
+  rtx old_reg;
+  rtx new_reg;
+
+  /* The number of consecutive stages that the move occupies.  */
+  int num_consecutive_stages;
+
+  /* An instruction that sets NEW_REG to the correct value.  The first
+     move associated with DEF will have an rhs of OLD_REG; later moves
+     use the result of the previous move.  */
+  rtx insn;
+};
+
+typedef struct ps_reg_move_info ps_reg_move_info;
+DEF_VEC_O (ps_reg_move_info);
+DEF_VEC_ALLOC_O (ps_reg_move_info, heap);
+
 /* Holds the partial schedule as an array of II rows.  Each entry of the
    array points to a linked list of PS_INSNs, which represents the
    instructions that are scheduled for that row.  */
@@ -148,6 +177,16 @@
   /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii).  */
   ps_insn_ptr *rows;
 
+  /* All the moves added for this partial schedule.  Index X has
+     a ps_insn id of X + g->num_nodes.  */
+  VEC (ps_reg_move_info, heap) *reg_moves;
+
+  /*  rows_length[i] holds the number of instructions in the row.
+      It is used only (as an optimization) to back off quickly from
+      trying to schedule a node in a full row; that is, to avoid running
+      through futile DFA state transitions.  */
+  int *rows_length;
+  
   /* The earliest absolute cycle of an insn in the partial schedule.  */
   int min_cycle;
 
@@ -155,29 +194,18 @@
   int max_cycle;
 
   ddg_ptr g;	/* The DDG of the insns in the partial schedule.  */
-};
 
-/* We use this to record all the register replacements we do in
-   the kernel so we can undo SMS if it is not profitable.  */
-struct undo_replace_buff_elem
-{
-  rtx insn;
-  rtx orig_reg;
-  rtx new_reg;
-  struct undo_replace_buff_elem *next;
+  int stage_count;  /* The stage count of the partial schedule.  */
 };
 
 
-
 static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
 static void free_partial_schedule (partial_schedule_ptr);
 static void reset_partial_schedule (partial_schedule_ptr, int new_ii);
 void print_partial_schedule (partial_schedule_ptr, FILE *);
 static void verify_partial_schedule (partial_schedule_ptr, sbitmap);
 static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
-						ddg_node_ptr node, int cycle,
-						sbitmap must_precede,
-						sbitmap must_follow);
+						int, int, sbitmap, sbitmap);
 static void rotate_partial_schedule (partial_schedule_ptr, int);
 void set_row_column_for_ps (partial_schedule_ptr);
 static void ps_insert_empty_row (partial_schedule_ptr, int, sbitmap);
@@ -193,34 +221,27 @@
 static void permute_partial_schedule (partial_schedule_ptr, rtx);
 static void generate_prolog_epilog (partial_schedule_ptr, struct loop *,
                                     rtx, rtx);
-static void duplicate_insns_of_cycles (partial_schedule_ptr,
-				       int, int, int, rtx);
-
-#define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
-#define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
-#define SCHED_FIRST_REG_MOVE(x) \
-	(((node_sched_params_ptr)(x)->aux.info)->first_reg_move)
-#define SCHED_NREG_MOVES(x) \
-	(((node_sched_params_ptr)(x)->aux.info)->nreg_moves)
-#define SCHED_ROW(x) (((node_sched_params_ptr)(x)->aux.info)->row)
-#define SCHED_STAGE(x) (((node_sched_params_ptr)(x)->aux.info)->stage)
-#define SCHED_COLUMN(x) (((node_sched_params_ptr)(x)->aux.info)->column)
+static int calculate_stage_count (partial_schedule_ptr, int);
+static void calculate_must_precede_follow (ddg_node_ptr, int, int,
+					   int, int, sbitmap, sbitmap, sbitmap);
+static int get_sched_window (partial_schedule_ptr, ddg_node_ptr, 
+			     sbitmap, int, int *, int *, int *);
+static bool try_scheduling_node_in_cycle (partial_schedule_ptr, int, int,
+					  sbitmap, int *, sbitmap, sbitmap);
+static void remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr);
+
+#define NODE_ASAP(node) ((node)->aux.count)
+
+#define SCHED_PARAMS(x) VEC_index (node_sched_params, node_sched_param_vec, x)
+#define SCHED_TIME(x) (SCHED_PARAMS (x)->time)
+#define SCHED_ROW(x) (SCHED_PARAMS (x)->row)
+#define SCHED_STAGE(x) (SCHED_PARAMS (x)->stage)
+#define SCHED_COLUMN(x) (SCHED_PARAMS (x)->column)
 
 /* The scheduling parameters held for each node.  */
 typedef struct node_sched_params
 {
-  int asap;	/* A lower-bound on the absolute scheduling cycle.  */
-  int time;	/* The absolute scheduling cycle (time >= asap).  */
-
-  /* The following field (first_reg_move) is a pointer to the first
-     register-move instruction added to handle the modulo-variable-expansion
-     of the register defined by this node.  This register-move copies the
-     original register defined by the node.  */
-  rtx first_reg_move;
-
-  /* The number of register-move instructions added, immediately preceding
-     first_reg_move.  */
-  int nreg_moves;
+  int time;	/* The absolute scheduling cycle.  */
 
   int row;    /* Holds time % ii.  */
   int stage;  /* Holds time / ii.  */
@@ -230,6 +251,9 @@
   int column;
 } *node_sched_params_ptr;
 
+typedef struct node_sched_params node_sched_params;
+DEF_VEC_O (node_sched_params);
+DEF_VEC_ALLOC_O (node_sched_params, heap);
 
 /* The following three functions are copied from the current scheduler
    code in order to use sched_analyze() for computing the dependencies.
@@ -279,6 +303,49 @@
   0
 };
 
+/* Partial schedule instruction ID in PS is a register move.  Return
+   information about it.  */
+static struct ps_reg_move_info *
+ps_reg_move (partial_schedule_ptr ps, int id)
+{
+  gcc_checking_assert (id >= ps->g->num_nodes);
+  return VEC_index (ps_reg_move_info, ps->reg_moves, id - ps->g->num_nodes);
+}
+
+/* Return the rtl instruction that is being scheduled by partial schedule
+   instruction ID, which belongs to schedule PS.  */
+static rtx
+ps_rtl_insn (partial_schedule_ptr ps, int id)
+{
+  if (id < ps->g->num_nodes)
+    return ps->g->nodes[id].insn;
+  else
+    return ps_reg_move (ps, id)->insn;
+}
+
+/* Partial schedule instruction ID, which belongs to PS, occured in
+   the original (unscheduled) loop.  Return the first instruction
+   in the loop that was associated with ps_rtl_insn (PS, ID).
+   If the instruction had some notes before it, this is the first
+   of those notes.  */
+static rtx
+ps_first_note (partial_schedule_ptr ps, int id)
+{
+  gcc_assert (id < ps->g->num_nodes);
+  return ps->g->nodes[id].first_note;
+}
+
+/* Return the number of consecutive stages that are occupied by
+   partial schedule instruction ID in PS.  */
+static int
+ps_num_consecutive_stages (partial_schedule_ptr ps, int id)
+{
+  if (id < ps->g->num_nodes)
+    return 1;
+  else
+    return ps_reg_move (ps, id)->num_consecutive_stages;
+}
+
 /* Given HEAD and TAIL which are the first and last insns in a loop;
    return the register which controls the loop.  Return zero if it has
    more than one occurrence in the loop besides the control part or the
@@ -310,10 +377,10 @@
      either a single (parallel) branch-on-count or a (non-parallel)
      branch immediately preceded by a single (decrement) insn.  */
   first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail
-                             : PREV_INSN (tail));
+                             : prev_nondebug_insn (tail));
 
   for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn))
-    if (reg_mentioned_p (reg, insn))
+    if (reg_mentioned_p (reg, insn) && !DEBUG_INSN_P (insn))
       {
         if (dump_file)
         {
@@ -379,35 +446,59 @@
 }
 
 
-/* Points to the array that contains the sched data for each node.  */
-static node_sched_params_ptr node_sched_params;
+/* A vector that contains the sched data for each ps_insn.  */
+static VEC (node_sched_params, heap) *node_sched_param_vec;
 
-/* Allocate sched_params for each node and initialize it.  Assumes that
-   the aux field of each node contain the asap bound (computed earlier),
-   and copies it into the sched_params field.  */
+/* Allocate sched_params for each node and initialize it.  */
 static void
 set_node_sched_params (ddg_ptr g)
 {
-  int i;
+  VEC_truncate (node_sched_params, node_sched_param_vec, 0);
+  VEC_safe_grow_cleared (node_sched_params, heap,
+			 node_sched_param_vec, g->num_nodes);
+}
 
-  /* Allocate for each node in the DDG a place to hold the "sched_data".  */
-  /* Initialize ASAP/ALAP/HIGHT to zero.  */
-  node_sched_params = (node_sched_params_ptr)
-		       xcalloc (g->num_nodes,
-				sizeof (struct node_sched_params));
+/* Make sure that node_sched_param_vec has an entry for every move in PS.  */
+static void
+extend_node_sched_params (partial_schedule_ptr ps)
+{
+  VEC_safe_grow_cleared (node_sched_params, heap, node_sched_param_vec,
+			 ps->g->num_nodes + VEC_length (ps_reg_move_info,
+							ps->reg_moves));
+}
 
-  /* Set the pointer of the general data of the node to point to the
-     appropriate sched_params structure.  */
-  for (i = 0; i < g->num_nodes; i++)
+/* Update the sched_params (time, row and stage) for node U using the II,
+   the CYCLE of U and MIN_CYCLE.
+   We're not simply taking the following
+   SCHED_STAGE (u) = CALC_STAGE_COUNT (SCHED_TIME (u), min_cycle, ii);
+   because the stages may not be aligned on cycle 0.  */
+static void
+update_node_sched_params (int u, int ii, int cycle, int min_cycle)
+{
+  int sc_until_cycle_zero;
+  int stage;
+
+  SCHED_TIME (u) = cycle;
+  SCHED_ROW (u) = SMODULO (cycle, ii);
+
+  /* The calculation of stage count is done adding the number
+     of stages before cycle zero and after cycle zero.  */
+  sc_until_cycle_zero = CALC_STAGE_COUNT (-1, min_cycle, ii);
+
+  if (SCHED_TIME (u) < 0)
+    {
+      stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii);
+      SCHED_STAGE (u) = sc_until_cycle_zero - stage;
+    }
+  else
     {
-      /* Watch out for aliasing problems?  */
-      node_sched_params[i].asap = g->nodes[i].aux.count;
-      g->nodes[i].aux.info = &node_sched_params[i];
+      stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii);
+      SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1;
     }
 }
 
 static void
-print_node_sched_params (FILE *file, int num_nodes, ddg_ptr g)
+print_node_sched_params (FILE *file, int num_nodes, partial_schedule_ptr ps)
 {
   int i;
 
@@ -415,22 +506,170 @@
     return;
   for (i = 0; i < num_nodes; i++)
     {
-      node_sched_params_ptr nsp = &node_sched_params[i];
-      rtx reg_move = nsp->first_reg_move;
-      int j;
+      node_sched_params_ptr nsp = SCHED_PARAMS (i);
 
       fprintf (file, "Node = %d; INSN = %d\n", i,
-	       (INSN_UID (g->nodes[i].insn)));
-      fprintf (file, " asap = %d:\n", nsp->asap);
+	       INSN_UID (ps_rtl_insn (ps, i)));
+      fprintf (file, " asap = %d:\n", NODE_ASAP (&ps->g->nodes[i]));
       fprintf (file, " time = %d:\n", nsp->time);
-      fprintf (file, " nreg_moves = %d:\n", nsp->nreg_moves);
-      for (j = 0; j < nsp->nreg_moves; j++)
+      fprintf (file, " stage = %d:\n", nsp->stage);
+    }
+}
+
+/* Set SCHED_COLUMN for each instruction in row ROW of PS.  */
+static void
+set_columns_for_row (partial_schedule_ptr ps, int row)
+{
+  ps_insn_ptr cur_insn;
+  int column;
+
+  column = 0;
+  for (cur_insn = ps->rows[row]; cur_insn; cur_insn = cur_insn->next_in_row)
+    SCHED_COLUMN (cur_insn->id) = column++;
+}
+
+/* Set SCHED_COLUMN for each instruction in PS.  */
+static void
+set_columns_for_ps (partial_schedule_ptr ps)
+{
+  int row;
+
+  for (row = 0; row < ps->ii; row++)
+    set_columns_for_row (ps, row);
+}
+
+/* Try to schedule the move with ps_insn identifier I_REG_MOVE in PS.
+   Its single predecessor has already been scheduled, as has its
+   ddg node successors.  (The move may have also another move as its
+   successor, in which case that successor will be scheduled later.)
+
+   The move is part of a chain that satisfies register dependencies
+   between a producing ddg node and various consuming ddg nodes.
+   If some of these dependencies have a distance of 1 (meaning that
+   the use is upward-exposoed) then DISTANCE1_USES is nonnull and
+   contains the set of uses with distance-1 dependencies.
+   DISTANCE1_USES is null otherwise.
+
+   MUST_FOLLOW is a scratch bitmap that is big enough to hold
+   all current ps_insn ids.
+
+   Return true on success.  */
+static bool
+schedule_reg_move (partial_schedule_ptr ps, int i_reg_move,
+		   sbitmap distance1_uses, sbitmap must_follow)
+{
+  unsigned int u;
+  int this_time, this_distance, this_start, this_end, this_latency;
+  int start, end, c, ii;
+  sbitmap_iterator sbi;
+  ps_reg_move_info *move;
+  rtx this_insn;
+  ps_insn_ptr psi;
+
+  move = ps_reg_move (ps, i_reg_move);
+  ii = ps->ii;
+  if (dump_file)
+    {
+      fprintf (dump_file, "Scheduling register move INSN %d; ii = %d"
+	       ", min cycle = %d\n\n", INSN_UID (move->insn), ii,
+	       PS_MIN_CYCLE (ps));
+      print_rtl_single (dump_file, move->insn);
+      fprintf (dump_file, "\n%11s %11s %5s\n", "start", "end", "time");
+      fprintf (dump_file, "=========== =========== =====\n");
+    }
+
+  start = INT_MIN;
+  end = INT_MAX;
+
+  /* For dependencies of distance 1 between a producer ddg node A
+     and consumer ddg node B, we have a chain of dependencies:
+
+        A --(T,L1,1)--> M1 --(T,L2,0)--> M2 ... --(T,Ln,0)--> B
+
+     where Mi is the ith move.  For dependencies of distance 0 between
+     a producer ddg node A and consumer ddg node C, we have a chain of
+     dependencies:
+
+        A --(T,L1',0)--> M1' --(T,L2',0)--> M2' ... --(T,Ln',0)--> C
+
+     where Mi' occupies the same position as Mi but occurs a stage later.
+     We can only schedule each move once, so if we have both types of
+     chain, we model the second as:
+
+        A --(T,L1',1)--> M1 --(T,L2',0)--> M2 ... --(T,Ln',-1)--> C
+
+     First handle the dependencies between the previously-scheduled
+     predecessor and the move.  */
+  this_insn = ps_rtl_insn (ps, move->def);
+  this_latency = insn_latency (this_insn, move->insn);
+  this_distance = distance1_uses && move->def < ps->g->num_nodes ? 1 : 0;
+  this_time = SCHED_TIME (move->def) - this_distance * ii;
+  this_start = this_time + this_latency;
+  this_end = this_time + ii;
+  if (dump_file)
+    fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n",
+	     this_start, this_end, SCHED_TIME (move->def),
+	     INSN_UID (this_insn), this_latency, this_distance,
+	     INSN_UID (move->insn));
+
+  if (start < this_start)
+    start = this_start;
+  if (end > this_end)
+    end = this_end;
+
+  /* Handle the dependencies between the move and previously-scheduled
+     successors.  */
+  EXECUTE_IF_SET_IN_SBITMAP (move->uses, 0, u, sbi)
+    {
+      this_insn = ps_rtl_insn (ps, u);
+      this_latency = insn_latency (move->insn, this_insn);
+      if (distance1_uses && !TEST_BIT (distance1_uses, u))
+	this_distance = -1;
+      else
+	this_distance = 0;
+      this_time = SCHED_TIME (u) + this_distance * ii;
+      this_start = this_time - ii;
+      this_end = this_time - this_latency;
+      if (dump_file)
+	fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n",
+		 this_start, this_end, SCHED_TIME (u), INSN_UID (move->insn),
+		 this_latency, this_distance, INSN_UID (this_insn));
+
+      if (start < this_start)
+	start = this_start;
+      if (end > this_end)
+	end = this_end;
+    }
+
+  if (dump_file)
+    {
+      fprintf (dump_file, "----------- ----------- -----\n");
+      fprintf (dump_file, "%11d %11d %5s %s\n", start, end, "", "(max, min)");
+    }
+
+  sbitmap_zero (must_follow);
+  SET_BIT (must_follow, move->def);
+
+  start = MAX (start, end - (ii - 1));
+  for (c = end; c >= start; c--)
+    {
+      psi = ps_add_node_check_conflicts (ps, i_reg_move, c,
+					 move->uses, must_follow);
+      if (psi)
 	{
-	  fprintf (file, " reg_move = ");
-	  print_rtl_single (file, reg_move);
-	  reg_move = PREV_INSN (reg_move);
+	  update_node_sched_params (i_reg_move, ii, c, PS_MIN_CYCLE (ps));
+	  if (dump_file)
+	    fprintf (dump_file, "\nScheduled register move INSN %d at"
+		     " time %d, row %d\n\n", INSN_UID (move->insn), c,
+		     SCHED_ROW (i_reg_move));
+	  return true;
 	}
     }
+
+  if (dump_file)
+    fprintf (dump_file, "\nNo available slot\n\n");
+
+  return false;
 }
 
 /*
@@ -444,175 +683,201 @@
    nreg_moves = ----------------------------------- + 1 - {   dependence.
                             ii                          { 1 if not.
 */
-static struct undo_replace_buff_elem *
-generate_reg_moves (partial_schedule_ptr ps, bool rescan)
+static bool
+schedule_reg_moves (partial_schedule_ptr ps)
 {
   ddg_ptr g = ps->g;
   int ii = ps->ii;
   int i;
-  struct undo_replace_buff_elem *reg_move_replaces = NULL;
 
   for (i = 0; i < g->num_nodes; i++)
     {
       ddg_node_ptr u = &g->nodes[i];
       ddg_edge_ptr e;
       int nreg_moves = 0, i_reg_move;
-      sbitmap *uses_of_defs;
-      rtx last_reg_move;
       rtx prev_reg, old_reg;
-
+      int first_move;
+      int distances[2];
+      sbitmap must_follow;
+      sbitmap distance1_uses;
+      rtx set = single_set (u->insn);
+      
+      /* Skip instructions that do not set a register.  */
+      if ((set && !REG_P (SET_DEST (set))))
+        continue;
+ 
       /* Compute the number of reg_moves needed for u, by looking at life
 	 ranges started at u (excluding self-loops).  */
+      distances[0] = distances[1] = false;
       for (e = u->out; e; e = e->next_out)
 	if (e->type == TRUE_DEP && e->dest != e->src)
 	  {
-	    int nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
+	    int nreg_moves4e = (SCHED_TIME (e->dest->cuid)
+				- SCHED_TIME (e->src->cuid)) / ii;
 
             if (e->distance == 1)
-              nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
+              nreg_moves4e = (SCHED_TIME (e->dest->cuid)
+			      - SCHED_TIME (e->src->cuid) + ii) / ii;
 
 	    /* If dest precedes src in the schedule of the kernel, then dest
 	       will read before src writes and we can save one reg_copy.  */
-	    if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
-		&& SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
+	    if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid)
+		&& SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid))
 	      nreg_moves4e--;
 
+            if (nreg_moves4e >= 1)
+	      {
+		/* !single_set instructions are not supported yet and
+		   thus we do not except to encounter them in the loop
+		   except from the doloop part.  For the latter case
+		   we assume no regmoves are generated as the doloop
+		   instructions are tied to the branch with an edge.  */
+		gcc_assert (set);
+		/* If the instruction contains auto-inc register then
+		   validate that the regmov is being generated for the
+		   target regsiter rather then the inc'ed register.	*/
+		gcc_assert (!autoinc_var_is_used_p (u->insn, e->dest->insn));
+	      }
+	    
+	    if (nreg_moves4e)
+	      {
+		gcc_assert (e->distance < 2);
+		distances[e->distance] = true;
+	      }
 	    nreg_moves = MAX (nreg_moves, nreg_moves4e);
 	  }
 
       if (nreg_moves == 0)
 	continue;
 
+      /* Create NREG_MOVES register moves.  */
+      first_move = VEC_length (ps_reg_move_info, ps->reg_moves);
+      VEC_safe_grow_cleared (ps_reg_move_info, heap, ps->reg_moves,
+			     first_move + nreg_moves);
+      extend_node_sched_params (ps);
+
+      /* Record the moves associated with this node.  */
+      first_move += ps->g->num_nodes;
+
+      /* Generate each move.  */
+      old_reg = prev_reg = SET_DEST (single_set (u->insn));
+      for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
+	{
+	  ps_reg_move_info *move = ps_reg_move (ps, first_move + i_reg_move);
+
+	  move->def = i_reg_move > 0 ? first_move + i_reg_move - 1 : i;
+	  move->uses = sbitmap_alloc (first_move + nreg_moves);
+	  move->old_reg = old_reg;
+	  move->new_reg = gen_reg_rtx (GET_MODE (prev_reg));
+	  move->num_consecutive_stages = distances[0] && distances[1] ? 2 : 1;
+	  move->insn = gen_move_insn (move->new_reg, copy_rtx (prev_reg));
+	  sbitmap_zero (move->uses);
+
+	  prev_reg = move->new_reg;
+	}
+
+      distance1_uses = distances[1] ? sbitmap_alloc (g->num_nodes) : NULL;
+
       /* Every use of the register defined by node may require a different
 	 copy of this register, depending on the time the use is scheduled.
-	 Set a bitmap vector, telling which nodes use each copy of this
-	 register.  */
-      uses_of_defs = sbitmap_vector_alloc (nreg_moves, g->num_nodes);
-      sbitmap_vector_zero (uses_of_defs, nreg_moves);
+	 Record which uses require which move results.  */
       for (e = u->out; e; e = e->next_out)
 	if (e->type == TRUE_DEP && e->dest != e->src)
 	  {
-	    int dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
+	    int dest_copy = (SCHED_TIME (e->dest->cuid)
+			     - SCHED_TIME (e->src->cuid)) / ii;
 
 	    if (e->distance == 1)
-	      dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
+	      dest_copy = (SCHED_TIME (e->dest->cuid)
+			   - SCHED_TIME (e->src->cuid) + ii) / ii;
 
-	    if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
-		&& SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
+	    if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid)
+		&& SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid))
 	      dest_copy--;
 
 	    if (dest_copy)
-	      SET_BIT (uses_of_defs[dest_copy - 1], e->dest->cuid);
-	  }
+	      {
+		ps_reg_move_info *move;
 
-      /* Now generate the reg_moves, attaching relevant uses to them.  */
-      SCHED_NREG_MOVES (u) = nreg_moves;
-      old_reg = prev_reg = copy_rtx (SET_DEST (single_set (u->insn)));
-      /* Insert the reg-moves right before the notes which precede
-         the insn they relates to.  */
-      last_reg_move = u->first_note;
+		move = ps_reg_move (ps, first_move + dest_copy - 1);
+		SET_BIT (move->uses, e->dest->cuid);
+		if (e->distance == 1)
+		  SET_BIT (distance1_uses, e->dest->cuid);
+	      }
+	  }
 
+      must_follow = sbitmap_alloc (first_move + nreg_moves);
       for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
-	{
-	  unsigned int i_use = 0;
-	  rtx new_reg = gen_reg_rtx (GET_MODE (prev_reg));
-	  rtx reg_move = gen_move_insn (new_reg, prev_reg);
-	  sbitmap_iterator sbi;
-
-	  add_insn_before (reg_move, last_reg_move, NULL);
-	  last_reg_move = reg_move;
-
-	  if (!SCHED_FIRST_REG_MOVE (u))
-	    SCHED_FIRST_REG_MOVE (u) = reg_move;
-
-	  EXECUTE_IF_SET_IN_SBITMAP (uses_of_defs[i_reg_move], 0, i_use, sbi)
-	    {
-	      struct undo_replace_buff_elem *rep;
-
-	      rep = (struct undo_replace_buff_elem *)
-		    xcalloc (1, sizeof (struct undo_replace_buff_elem));
-	      rep->insn = g->nodes[i_use].insn;
-	      rep->orig_reg = old_reg;
-	      rep->new_reg = new_reg;
-
-	      if (! reg_move_replaces)
-		reg_move_replaces = rep;
-	      else
-		{
-		  rep->next = reg_move_replaces;
-		  reg_move_replaces = rep;
-		}
-
-	      replace_rtx (g->nodes[i_use].insn, old_reg, new_reg);
-	      if (rescan)
-		df_insn_rescan (g->nodes[i_use].insn);
-	    }
-
-	  prev_reg = new_reg;
-	}
-      sbitmap_vector_free (uses_of_defs);
+	if (!schedule_reg_move (ps, first_move + i_reg_move,
+				distance1_uses, must_follow))
+	  break;
+      sbitmap_free (must_follow);
+      if (distance1_uses)
+	sbitmap_free (distance1_uses);
+      if (i_reg_move < nreg_moves)
+	return false;
     }
-  return reg_move_replaces;
+  return true;
 }
 
-/* Free memory allocated for the undo buffer.  */
+/* Emit the moves associatied with PS.  Apply the substitutions
+   associated with them.  */
 static void
-free_undo_replace_buff (struct undo_replace_buff_elem *reg_move_replaces)
+apply_reg_moves (partial_schedule_ptr ps)
 {
+  ps_reg_move_info *move;
+  int i;
 
-  while (reg_move_replaces)
+  FOR_EACH_VEC_ELT (ps_reg_move_info, ps->reg_moves, i, move)
     {
-      struct undo_replace_buff_elem *rep = reg_move_replaces;
+      unsigned int i_use;
+      sbitmap_iterator sbi;
 
-      reg_move_replaces = reg_move_replaces->next;
-      free (rep);
+      EXECUTE_IF_SET_IN_SBITMAP (move->uses, 0, i_use, sbi)
+	{
+	  replace_rtx (ps->g->nodes[i_use].insn, move->old_reg, move->new_reg);
+	  df_insn_rescan (ps->g->nodes[i_use].insn);
+	}
     }
 }
 
-/* Bump the SCHED_TIMEs of all nodes to start from zero.  Set the values
-   of SCHED_ROW and SCHED_STAGE.  */
+/* Bump the SCHED_TIMEs of all nodes by AMOUNT.  Set the values of
+   SCHED_ROW and SCHED_STAGE.  */
 static void
-normalize_sched_times (partial_schedule_ptr ps)
+reset_sched_times (partial_schedule_ptr ps, int amount)
 {
   int row;
-  int amount = PS_MIN_CYCLE (ps);
   int ii = ps->ii;
   ps_insn_ptr crr_insn;
 
   for (row = 0; row < ii; row++)
     for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
       {
-	ddg_node_ptr u = crr_insn->node;
+	int u = crr_insn->id;
 	int normalized_time = SCHED_TIME (u) - amount;
+	int new_min_cycle = PS_MIN_CYCLE (ps) - amount;
 
-	if (dump_file)
-	  fprintf (dump_file, "crr_insn->node=%d, crr_insn->cycle=%d,\
-		   min_cycle=%d\n", crr_insn->node->cuid, SCHED_TIME
-		   (u), ps->min_cycle);
+        if (dump_file)
+          {
+            /* Print the scheduling times after the rotation.  */
+	    rtx insn = ps_rtl_insn (ps, u);
+
+            fprintf (dump_file, "crr_insn->node=%d (insn id %d), "
+                     "crr_insn->cycle=%d, min_cycle=%d", u,
+                     INSN_UID (insn), normalized_time, new_min_cycle);
+            if (JUMP_P (insn))
+              fprintf (dump_file, " (branch)");
+            fprintf (dump_file, "\n");
+          }
+	
 	gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
 	gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
-	SCHED_TIME (u) = normalized_time;
-	SCHED_ROW (u) = normalized_time % ii;
-	SCHED_STAGE (u) = normalized_time / ii;
-      }
-}
-
-/* Set SCHED_COLUMN of each node according to its position in PS.  */
-static void
-set_columns_for_ps (partial_schedule_ptr ps)
-{
-  int row;
-
-  for (row = 0; row < ps->ii; row++)
-    {
-      ps_insn_ptr cur_insn = ps->rows[row];
-      int column = 0;
 
-      for (; cur_insn; cur_insn = cur_insn->next_in_row)
-	SCHED_COLUMN (cur_insn->node) = column++;
-    }
+	crr_insn->cycle = normalized_time;
+	update_node_sched_params (u, ii, normalized_time, new_min_cycle);
+      }
 }
-
+ 
 /* Permute the insns according to their order in PS, from row 0 to
    row ii-1, and position them right before LAST.  This schedules
    the insns of the loop kernel.  */
@@ -625,14 +890,220 @@
 
   for (row = 0; row < ii ; row++)
     for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
-      if (PREV_INSN (last) != ps_ij->node->insn)
-      	reorder_insns_nobb (ps_ij->node->first_note, ps_ij->node->insn,
-			    PREV_INSN (last));
+      {
+	rtx insn = ps_rtl_insn (ps, ps_ij->id);
+
+	if (PREV_INSN (last) != insn)
+	  {
+	    if (ps_ij->id < ps->g->num_nodes)
+	      reorder_insns_nobb (ps_first_note (ps, ps_ij->id), insn,
+				  PREV_INSN (last));
+	    else
+	      add_insn_before (insn, last, NULL);
+	  }
+      }
+}
+
+/* Set bitmaps TMP_FOLLOW and TMP_PRECEDE to MUST_FOLLOW and MUST_PRECEDE
+   respectively only if cycle C falls on the border of the scheduling
+   window boundaries marked by START and END cycles.  STEP is the
+   direction of the window.  */
+static inline void
+set_must_precede_follow (sbitmap *tmp_follow, sbitmap must_follow,
+			 sbitmap *tmp_precede, sbitmap must_precede, int c,
+			 int start, int end, int step)
+{
+  *tmp_precede = NULL;
+  *tmp_follow = NULL;
+
+  if (c == start)
+    {
+      if (step == 1)
+	*tmp_precede = must_precede;
+      else			/* step == -1.  */
+	*tmp_follow = must_follow;
+    }
+  if (c == end - step)
+    {
+      if (step == 1)
+	*tmp_follow = must_follow;
+      else			/* step == -1.  */
+	*tmp_precede = must_precede;
+    }
+
+}
+
+/* Return True if the branch can be moved to row ii-1 while
+   normalizing the partial schedule PS to start from cycle zero and thus
+   optimize the SC.  Otherwise return False.  */
+static bool
+optimize_sc (partial_schedule_ptr ps, ddg_ptr g)
+{
+  int amount = PS_MIN_CYCLE (ps);
+  sbitmap sched_nodes = sbitmap_alloc (g->num_nodes);
+  int start, end, step;
+  int ii = ps->ii;
+  bool ok = false;
+  int stage_count, stage_count_curr;
+
+  /* Compare the SC after normalization and SC after bringing the branch
+     to row ii-1.  If they are equal just bail out.  */
+  stage_count = calculate_stage_count (ps, amount);
+  stage_count_curr =
+    calculate_stage_count (ps, SCHED_TIME (g->closing_branch->cuid) - (ii - 1));
+
+  if (stage_count == stage_count_curr)
+    {
+      if (dump_file)
+	fprintf (dump_file, "SMS SC already optimized.\n");
+
+      ok = false;
+      goto clear;
+    }
+
+  if (dump_file)
+    {
+      fprintf (dump_file, "SMS Trying to optimize branch location\n");
+      fprintf (dump_file, "SMS partial schedule before trial:\n");
+      print_partial_schedule (ps, dump_file);
+    }
+
+  /* First, normalize the partial scheduling.  */
+  reset_sched_times (ps, amount);
+  rotate_partial_schedule (ps, amount);
+  if (dump_file)
+    {
+      fprintf (dump_file,
+	       "SMS partial schedule after normalization (ii, %d, SC %d):\n",
+	       ii, stage_count);
+      print_partial_schedule (ps, dump_file);
+    }
+
+  if (SMODULO (SCHED_TIME (g->closing_branch->cuid), ii) == ii - 1)
+    {
+      ok = true;
+      goto clear;
+    }
+
+  sbitmap_ones (sched_nodes);
+
+  /* Calculate the new placement of the branch.  It should be in row
+     ii-1 and fall into it's scheduling window.  */
+  if (get_sched_window (ps, g->closing_branch, sched_nodes, ii, &start,
+			&step, &end) == 0)
+    {
+      bool success;
+      ps_insn_ptr next_ps_i;
+      int branch_cycle = SCHED_TIME (g->closing_branch->cuid);
+      int row = SMODULO (branch_cycle, ps->ii);
+      int num_splits = 0;
+      sbitmap must_precede, must_follow, tmp_precede, tmp_follow;
+      int c;
+
+      if (dump_file)
+	fprintf (dump_file, "\nTrying to schedule node %d "
+		 "INSN = %d  in (%d .. %d) step %d\n",
+		 g->closing_branch->cuid,
+		 (INSN_UID (g->closing_branch->insn)), start, end, step);
+
+      gcc_assert ((step > 0 && start < end) || (step < 0 && start > end));
+      if (step == 1)
+	{
+	  c = start + ii - SMODULO (start, ii) - 1;
+	  gcc_assert (c >= start);
+	  if (c >= end)
+	    {
+	      ok = false;
+	      if (dump_file)
+		fprintf (dump_file,
+			 "SMS failed to schedule branch at cycle: %d\n", c);
+	      goto clear;
+	    }
+	}
+      else
+	{
+	  c = start - SMODULO (start, ii) - 1;
+	  gcc_assert (c <= start);
+
+	  if (c <= end)
+	    {
+	      if (dump_file)
+		fprintf (dump_file,
+			 "SMS failed to schedule branch at cycle: %d\n", c);
+	      ok = false;
+	      goto clear;
+	    }
+	}
+
+      must_precede = sbitmap_alloc (g->num_nodes);
+      must_follow = sbitmap_alloc (g->num_nodes);
+
+      /* Try to schedule the branch is it's new cycle.  */
+      calculate_must_precede_follow (g->closing_branch, start, end,
+				     step, ii, sched_nodes,
+				     must_precede, must_follow);
+
+      set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
+			       must_precede, c, start, end, step);
+
+      /* Find the element in the partial schedule related to the closing
+         branch so we can remove it from it's current cycle.  */
+      for (next_ps_i = ps->rows[row];
+	   next_ps_i; next_ps_i = next_ps_i->next_in_row)
+	if (next_ps_i->id == g->closing_branch->cuid)
+	  break;
+
+      remove_node_from_ps (ps, next_ps_i);
+      success =
+	try_scheduling_node_in_cycle (ps, g->closing_branch->cuid, c,
+				      sched_nodes, &num_splits,
+				      tmp_precede, tmp_follow);
+      gcc_assert (num_splits == 0);
+      if (!success)
+	{
+	  if (dump_file)
+	    fprintf (dump_file,
+		     "SMS failed to schedule branch at cycle: %d, "
+		     "bringing it back to cycle %d\n", c, branch_cycle);
+
+	  /* The branch was failed to be placed in row ii - 1.
+	     Put it back in it's original place in the partial
+	     schedualing.  */
+	  set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
+				   must_precede, branch_cycle, start, end,
+				   step);
+	  success =
+	    try_scheduling_node_in_cycle (ps, g->closing_branch->cuid,
+					  branch_cycle, sched_nodes,
+					  &num_splits, tmp_precede,
+					  tmp_follow);
+	  gcc_assert (success && (num_splits == 0));
+	  ok = false;
+	}
+      else
+	{
+	  /* The branch is placed in row ii - 1.  */
+	  if (dump_file)
+	    fprintf (dump_file,
+		     "SMS success in moving branch to cycle %d\n", c);
+
+	  update_node_sched_params (g->closing_branch->cuid, ii, c,
+				    PS_MIN_CYCLE (ps));
+	  ok = true;
+	}
+
+      free (must_precede);
+      free (must_follow);
+    }
+
+clear:
+  free (sched_nodes);
+  return ok;
 }
 
 static void
 duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
-			   int to_stage, int for_prolog, rtx count_reg)
+			   int to_stage, rtx count_reg)
 {
   int row;
   ps_insn_ptr ps_ij;
@@ -640,59 +1111,30 @@
   for (row = 0; row < ps->ii; row++)
     for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
       {
-	ddg_node_ptr u_node = ps_ij->node;
-	int j, i_reg_moves;
-	rtx reg_move = NULL_RTX;
+	int u = ps_ij->id;
+	int first_u, last_u;
+	rtx u_insn;
 
         /* Do not duplicate any insn which refers to count_reg as it
            belongs to the control part.
+           The closing branch is scheduled as well and thus should
+           be ignored.
            TODO: This should be done by analyzing the control part of
            the loop.  */
-        if (reg_mentioned_p (count_reg, u_node->insn))
+	u_insn = ps_rtl_insn (ps, u);
+        if (reg_mentioned_p (count_reg, u_insn)
+            || JUMP_P (u_insn))
           continue;
 
-	if (for_prolog)
-	  {
-	    /* SCHED_STAGE (u_node) >= from_stage == 0.  Generate increasing
-	       number of reg_moves starting with the second occurrence of
-	       u_node, which is generated if its SCHED_STAGE <= to_stage.  */
-	    i_reg_moves = to_stage - SCHED_STAGE (u_node) + 1;
-	    i_reg_moves = MAX (i_reg_moves, 0);
-	    i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
-
-	    /* The reg_moves start from the *first* reg_move backwards.  */
-	    if (i_reg_moves)
-	      {
-		reg_move = SCHED_FIRST_REG_MOVE (u_node);
-		for (j = 1; j < i_reg_moves; j++)
-		  reg_move = PREV_INSN (reg_move);
-	      }
-	  }
-	else /* It's for the epilog.  */
+	first_u = SCHED_STAGE (u);
+	last_u = first_u + ps_num_consecutive_stages (ps, u) - 1;
+	if (from_stage <= last_u && to_stage >= first_u)
 	  {
-	    /* SCHED_STAGE (u_node) <= to_stage.  Generate all reg_moves,
-	       starting to decrease one stage after u_node no longer occurs;
-	       that is, generate all reg_moves until
-	       SCHED_STAGE (u_node) == from_stage - 1.  */
-	    i_reg_moves = SCHED_NREG_MOVES (u_node)
-	    	       - (from_stage - SCHED_STAGE (u_node) - 1);
-	    i_reg_moves = MAX (i_reg_moves, 0);
-	    i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
-
-	    /* The reg_moves start from the *last* reg_move forwards.  */
-	    if (i_reg_moves)
-	      {
-		reg_move = SCHED_FIRST_REG_MOVE (u_node);
-		for (j = 1; j < SCHED_NREG_MOVES (u_node); j++)
-		  reg_move = PREV_INSN (reg_move);
-	      }
+	    if (u < ps->g->num_nodes)
+	      duplicate_insn_chain (ps_first_note (ps, u), u_insn);
+	    else
+	      emit_insn (copy_rtx (PATTERN (u_insn)));
 	  }
-
-	for (j = 0; j < i_reg_moves; j++, reg_move = NEXT_INSN (reg_move))
-	  emit_insn (copy_rtx (PATTERN (reg_move)));
-	if (SCHED_STAGE (u_node) >= from_stage
-	    && SCHED_STAGE (u_node) <= to_stage)
-	  duplicate_insn_chain (u_node->first_note, u_node->insn);
       }
 }
 
@@ -726,11 +1168,13 @@
     }
 
   for (i = 0; i < last_stage; i++)
-    duplicate_insns_of_cycles (ps, 0, i, 1, count_reg);
+    duplicate_insns_of_cycles (ps, 0, i, count_reg);
 
   /* Put the prolog on the entry edge.  */
   e = loop_preheader_edge (loop);
   split_edge_and_insert (e, get_insns ());
+  if (!flag_resched_modulo_sched)
+    e->dest->flags |= BB_DISABLE_SCHEDULE;
 
   end_sequence ();
 
@@ -738,15 +1182,32 @@
   start_sequence ();
 
   for (i = 0; i < last_stage; i++)
-    duplicate_insns_of_cycles (ps, i + 1, last_stage, 0, count_reg);
+    duplicate_insns_of_cycles (ps, i + 1, last_stage, count_reg);
 
   /* Put the epilogue on the exit edge.  */
   gcc_assert (single_exit (loop));
   e = single_exit (loop);
   split_edge_and_insert (e, get_insns ());
+  if (!flag_resched_modulo_sched)
+    e->dest->flags |= BB_DISABLE_SCHEDULE;
+
   end_sequence ();
 }
 
+/* Mark LOOP as software pipelined so the later
+   scheduling passes don't touch it.  */
+static void
+mark_loop_unsched (struct loop *loop)
+{
+  unsigned i;
+  basic_block *bbs = get_loop_body (loop);
+
+  for (i = 0; i < loop->num_nodes; i++)
+    bbs[i]->flags |= BB_DISABLE_SCHEDULE;
+
+  free (bbs);
+}
+
 /* Return true if all the BBs of the loop are empty except the
    loop header.  */
 static bool
@@ -1009,10 +1470,10 @@
 	continue;
       }
 
-      /* Don't handle BBs with calls or barriers, or !single_set insns,
-         or auto-increment insns (to avoid creating invalid reg-moves
-         for the auto-increment insns).
-         ??? Should handle auto-increment insns.
+      /* Don't handle BBs with calls or barriers
+	 or !single_set with the exception of instructions that include
+	 count_reg---these instructions are part of the control part
+	 that do-loop recognizes.
          ??? Should handle insns defining subregs.  */
      for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
       {
@@ -1021,8 +1482,8 @@
         if (CALL_P (insn)
             || BARRIER_P (insn)
             || (NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
-                && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE)
-            || (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
+                && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE
+                && !reg_mentioned_p (count_reg, insn))
             || (INSN_P (insn) && (set = single_set (insn))
                 && GET_CODE (SET_DEST (set)) == SUBREG))
         break;
@@ -1036,8 +1497,6 @@
 		fprintf (dump_file, "SMS loop-with-call\n");
 	      else if (BARRIER_P (insn))
 		fprintf (dump_file, "SMS loop-with-barrier\n");
-              else if (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
-                fprintf (dump_file, "SMS reg inc\n");
               else if ((NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
                 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
                 fprintf (dump_file, "SMS loop-with-not-single-set\n");
@@ -1049,7 +1508,11 @@
 	  continue;
 	}
 
-      if (! (g = create_ddg (bb, 0)))
+      /* Always schedule the closing branch with the rest of the
+         instructions. The branch is rotated to be in row ii-1 at the
+         end of the scheduling procedure to make sure it's the last
+         instruction in the iteration.  */
+      if (! (g = create_ddg (bb, 1)))
         {
           if (dump_file)
 	    fprintf (dump_file, "SMS create_ddg failed\n");
@@ -1072,9 +1535,9 @@
     {
       rtx head, tail;
       rtx count_reg, count_init;
-      int mii, rec_mii;
-      unsigned stage_count = 0;
+      int mii, rec_mii, stage_count, min_cycle;
       HOST_WIDEST_INT loop_count = 0;
+      bool opt_sc_p;
 
       if (! (g = g_arr[loop->num]))
         continue;
@@ -1151,63 +1614,103 @@
 	fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
 		 rec_mii, mii, maxii);
 
-      /* After sms_order_nodes and before sms_schedule_by_order, to copy over
-	 ASAP.  */
-      set_node_sched_params (g);
-
-      ps = sms_schedule_by_order (g, mii, maxii, node_order);
-
-      if (ps){
-	stage_count = PS_STAGE_COUNT (ps);
-        gcc_assert(stage_count >= 1);
-      }
-
-      /* Stage count of 1 means that there is no interleaving between
-         iterations, let the scheduling passes do the job.  */
-      if (stage_count <= 1
-	  || (count_init && (loop_count <= stage_count))
-	  || (flag_branch_probabilities && (trip_count <= stage_count)))
+      for (;;)
 	{
-	  if (dump_file)
+	  set_node_sched_params (g);
+
+	  stage_count = 0;
+	  opt_sc_p = false;
+	  ps = sms_schedule_by_order (g, mii, maxii, node_order);
+      
+	  if (ps)
 	    {
-	      fprintf (dump_file, "SMS failed... \n");
-	      fprintf (dump_file, "SMS sched-failed (stage-count=%d, loop-count=", stage_count);
-	      fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, loop_count);
-	      fprintf (dump_file, ", trip-count=");
-	      fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count);
-	      fprintf (dump_file, ")\n");
-	    }
-	  continue;
-	}
-      else
-	{
-	  struct undo_replace_buff_elem *reg_move_replaces;
+	      /* Try to achieve optimized SC by normalizing the partial
+		 schedule (having the cycles start from cycle zero).
+		 The branch location must be placed in row ii-1 in the
+		 final scheduling.	If failed, shift all instructions to
+		 position the branch in row ii-1.  */
+	      opt_sc_p = optimize_sc (ps, g);
+	      if (opt_sc_p)
+		stage_count = calculate_stage_count (ps, 0);
+	      else
+		{
+		  /* Bring the branch to cycle ii-1.  */
+		  int amount = (SCHED_TIME (g->closing_branch->cuid)
+				- (ps->ii - 1));
+	      
+		  if (dump_file)
+		    fprintf (dump_file, "SMS schedule branch at cycle ii-1\n");
 
-	  if (dump_file)
+		  stage_count = calculate_stage_count (ps, amount);
+		}
+	  
+	      gcc_assert (stage_count >= 1);
+	    }
+      
+	  /* The default value of PARAM_SMS_MIN_SC is 2 as stage count of
+	     1 means that there is no interleaving between iterations thus
+	     we let the scheduling passes do the job in this case.  */
+	  if (stage_count < PARAM_VALUE (PARAM_SMS_MIN_SC)
+	      || (count_init && (loop_count <= stage_count))
+	      || (flag_branch_probabilities && (trip_count <= stage_count)))
 	    {
-	      fprintf (dump_file,
-		       "SMS succeeded %d %d (with ii, sc)\n", ps->ii,
-		       stage_count);
-	      print_partial_schedule (ps, dump_file);
-	      fprintf (dump_file,
-		       "SMS Branch (%d) will later be scheduled at cycle %d.\n",
-		       g->closing_branch->cuid, PS_MIN_CYCLE (ps) - 1);
+	      if (dump_file)
+		{
+		  fprintf (dump_file, "SMS failed... \n");
+		  fprintf (dump_file, "SMS sched-failed (stage-count=%d,"
+			   " loop-count=", stage_count);
+		  fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, loop_count);
+		  fprintf (dump_file, ", trip-count=");
+		  fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count);
+		  fprintf (dump_file, ")\n");
+		}
+	      break;
 	    }
 
-	  /* Set the stage boundaries.  If the DDG is built with closing_branch_deps,
-	     the closing_branch was scheduled and should appear in the last (ii-1)
-	     row.  Otherwise, we are free to schedule the branch, and we let nodes
-	     that were scheduled at the first PS_MIN_CYCLE cycle appear in the first
-	     row; this should reduce stage_count to minimum.
-             TODO: Revisit the issue of scheduling the insns of the
-             control part relative to the branch when the control part
-             has more than one insn.  */
-	  normalize_sched_times (ps);
-	  rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
+          if (!opt_sc_p)
+            {
+	      /* Rotate the partial schedule to have the branch in row ii-1.  */
+              int amount = SCHED_TIME (g->closing_branch->cuid) - (ps->ii - 1);
+	      
+              reset_sched_times (ps, amount);
+              rotate_partial_schedule (ps, amount);
+            }
+	  
 	  set_columns_for_ps (ps);
 
+	  min_cycle = PS_MIN_CYCLE (ps) - SMODULO (PS_MIN_CYCLE (ps), ps->ii);
+	  if (!schedule_reg_moves (ps))
+	    {
+	      mii = ps->ii + 1;
+	      free_partial_schedule (ps);
+	      continue;
+	    }
+
+	  /* Moves that handle incoming values might have been added
+	     to a new first stage.  Bump the stage count if so.
+
+	     ??? Perhaps we could consider rotating the schedule here
+	     instead?  */
+	  if (PS_MIN_CYCLE (ps) < min_cycle)
+	    {
+	      reset_sched_times (ps, 0);
+	      stage_count++;
+	    }
+
+	  /* The stage count should now be correct without rotation.  */
+	  gcc_checking_assert (stage_count == calculate_stage_count (ps, 0));
+	  PS_STAGE_COUNT (ps) = stage_count;
+
 	  canon_loop (loop);
 
+          if (dump_file)
+            {
+	      fprintf (dump_file,
+		       "%s:%d SMS succeeded %d %d (with ii, sc)\n",
+		       insn_file (tail), insn_line (tail), ps->ii, stage_count);
+	      print_partial_schedule (ps, dump_file);
+	    }
+ 
           /* case the BCT count is not known , Do loop-versioning */
 	  if (count_reg && ! count_init)
             {
@@ -1230,23 +1733,23 @@
 	  permute_partial_schedule (ps, g->closing_branch->first_note);
 
           /* Mark this loop as software pipelined so the later
-	     scheduling passes doesn't touch it.  */
+	     scheduling passes don't touch it.  */
 	  if (! flag_resched_modulo_sched)
-	    g->bb->flags |= BB_DISABLE_SCHEDULE;
+	    mark_loop_unsched (loop);
+	  
 	  /* The life-info is not valid any more.  */
 	  df_set_bb_dirty (g->bb);
 
-	  reg_move_replaces = generate_reg_moves (ps, true);
+	  apply_reg_moves (ps);
 	  if (dump_file)
-	    print_node_sched_params (dump_file, g->num_nodes, g);
+	    print_node_sched_params (dump_file, g->num_nodes, ps);
 	  /* Generate prolog and epilog.  */
           generate_prolog_epilog (ps, loop, count_reg, count_init);
-
-	  free_undo_replace_buff (reg_move_replaces);
+	  break;
 	}
 
       free_partial_schedule (ps);
-      free (node_sched_params);
+      VEC_free (node_sched_params, heap, node_sched_param_vec);
       free (node_order);
       free_ddg (g);
     }
@@ -1347,19 +1850,21 @@
    scheduling window is empty and zero otherwise.  */
 
 static int
-get_sched_window (partial_schedule_ptr ps, int *nodes_order, int i,
-		  sbitmap sched_nodes, int ii, int *start_p, int *step_p, int *end_p)
+get_sched_window (partial_schedule_ptr ps, ddg_node_ptr u_node,
+		  sbitmap sched_nodes, int ii, int *start_p, int *step_p,
+		  int *end_p)
 {
   int start, step, end;
+  int early_start, late_start;
   ddg_edge_ptr e;
-  int u = nodes_order [i];
-  ddg_node_ptr u_node = &ps->g->nodes[u];
   sbitmap psp = sbitmap_alloc (ps->g->num_nodes);
   sbitmap pss = sbitmap_alloc (ps->g->num_nodes);
   sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
   sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
   int psp_not_empty;
   int pss_not_empty;
+  int count_preds;
+  int count_succs;
 
   /* 1. compute sched window for u (start, end, step).  */
   sbitmap_zero (psp);
@@ -1367,214 +1872,119 @@
   psp_not_empty = sbitmap_a_and_b_cg (psp, u_node_preds, sched_nodes);
   pss_not_empty = sbitmap_a_and_b_cg (pss, u_node_succs, sched_nodes);
 
-  if (psp_not_empty && !pss_not_empty)
-    {
-      int early_start = INT_MIN;
-
-      end = INT_MAX;
-      for (e = u_node->in; e != 0; e = e->next_in)
-	{
-	  ddg_node_ptr v_node = e->src;
-
-          if (dump_file)
-            {
-	      fprintf (dump_file, "\nProcessing edge: ");
-              print_ddg_edge (dump_file, e);
-	      fprintf (dump_file,
-		       "\nScheduling %d (%d) in psp_not_empty,"
-		       " checking p %d (%d): ", u_node->cuid,
-		       INSN_UID (u_node->insn), v_node->cuid, INSN_UID
-		       (v_node->insn));
-            }
-
-	  if (TEST_BIT (sched_nodes, v_node->cuid))
-	    {
-              int p_st = SCHED_TIME (v_node);
-
-              early_start =
-                MAX (early_start, p_st + e->latency - (e->distance * ii));
-
-              if (dump_file)
-                fprintf (dump_file,
-                         "pred st = %d; early_start = %d; latency: %d",
-                         p_st, early_start, e->latency);
+  /* We first compute a forward range (start <= end), then decide whether
+     to reverse it.  */
+  early_start = INT_MIN;
+  late_start = INT_MAX;
+  start = INT_MIN;
+  end = INT_MAX;
+  step = 1;
+
+  count_preds = 0;
+  count_succs = 0;
+
+  if (dump_file && (psp_not_empty || pss_not_empty))
+    {
+      fprintf (dump_file, "\nAnalyzing dependencies for node %d (INSN %d)"
+	       "; ii = %d\n\n", u_node->cuid, INSN_UID (u_node->insn), ii);
+      fprintf (dump_file, "%11s %11s %11s %11s %5s\n",
+	       "start", "early start", "late start", "end", "time");
+      fprintf (dump_file, "=========== =========== =========== ==========="
+	       " =====\n");
+    }
+  /* Calculate early_start and limit end.  Both bounds are inclusive.  */
+  if (psp_not_empty)
+    for (e = u_node->in; e != 0; e = e->next_in)
+      {
+	int v = e->src->cuid;
 
-	      if (e->data_type == MEM_DEP)
-		end = MIN (end, SCHED_TIME (v_node) + ii - 1);
-	    }
-         else if (dump_file)
-            fprintf (dump_file, "the node is not scheduled\n");
-	}
-      start = early_start;
-      end = MIN (end, early_start + ii);
-      /* Schedule the node close to it's predecessors.  */
-      step = 1;
+	if (TEST_BIT (sched_nodes, v))
+	  {
+	    int p_st = SCHED_TIME (v);
+	    int earliest = p_st + e->latency - (e->distance * ii);
+	    int latest = (e->data_type == MEM_DEP ? p_st + ii - 1 : INT_MAX);
 
-      if (dump_file)
-        fprintf (dump_file,
-		 "\nScheduling %d (%d) in a window (%d..%d) with step %d\n",
-		 u_node->cuid, INSN_UID (u_node->insn), start, end, step);
-    }
+	    if (dump_file)
+	      {
+		fprintf (dump_file, "%11s %11d %11s %11d %5d",
+			 "", earliest, "", latest, p_st);
+		print_ddg_edge (dump_file, e);
+		fprintf (dump_file, "\n");
+	      }
 
-  else if (!psp_not_empty && pss_not_empty)
-    {
-      int late_start = INT_MAX;
+	    early_start = MAX (early_start, earliest);
+	    end = MIN (end, latest);
 
-      end = INT_MIN;
-      for (e = u_node->out; e != 0; e = e->next_out)
-	{
-	  ddg_node_ptr v_node = e->dest;
+	    if (e->type == TRUE_DEP && e->data_type == REG_DEP)
+	      count_preds++;
+	  }
+      }
 
-          if (dump_file)
-            {
-              fprintf (dump_file, "\nProcessing edge:");
-              print_ddg_edge (dump_file, e);
-              fprintf (dump_file,
-                       "\nScheduling %d (%d) in pss_not_empty,"
-                       " checking s %d (%d): ", u_node->cuid,
-                       INSN_UID (u_node->insn), v_node->cuid, INSN_UID
-                       (v_node->insn));
-            }
+  /* Calculate late_start and limit start.  Both bounds are inclusive.  */
+  if (pss_not_empty)
+    for (e = u_node->out; e != 0; e = e->next_out)
+      {
+	int v = e->dest->cuid;
 
-	  if (TEST_BIT (sched_nodes, v_node->cuid))
-	    {
-              int s_st = SCHED_TIME (v_node);
+	if (TEST_BIT (sched_nodes, v))
+	  {
+	    int s_st = SCHED_TIME (v);
+	    int earliest = (e->data_type == MEM_DEP ? s_st - ii + 1 : INT_MIN);
+	    int latest = s_st - e->latency + (e->distance * ii);
 
-              late_start = MIN (late_start,
-                                s_st - e->latency + (e->distance * ii));
+	    if (dump_file)
+	      {
+		fprintf (dump_file, "%11d %11s %11d %11s %5d",
+			 earliest, "", latest, "", s_st);
+		print_ddg_edge (dump_file, e);
+		fprintf (dump_file, "\n");
+	      }
 
-              if (dump_file)
-                fprintf (dump_file,
-                         "succ st = %d; late_start = %d; latency = %d",
-                         s_st, late_start, e->latency);
-
-	      if (e->data_type == MEM_DEP)
-		end = MAX (end, SCHED_TIME (v_node) - ii + 1);
-             if (dump_file)
-                 fprintf (dump_file, "end = %d\n", end);
+	    start = MAX (start, earliest);
+	    late_start = MIN (late_start, latest);
 
-	    }
-          else if (dump_file)
-            fprintf (dump_file, "the node is not scheduled\n");
+	    if (e->type == TRUE_DEP && e->data_type == REG_DEP)
+	      count_succs++;
+	  }
+      }
 
-	}
-      start = late_start;
-      end = MAX (end, late_start - ii);
-      /* Schedule the node close to it's successors.  */
+  if (dump_file && (psp_not_empty || pss_not_empty))
+    {
+      fprintf (dump_file, "----------- ----------- ----------- -----------"
+	       " -----\n");
+      fprintf (dump_file, "%11d %11d %11d %11d %5s %s\n",
+	       start, early_start, late_start, end, "",
+	       "(max, max, min, min)");
+    }
+
+  /* Get a target scheduling window no bigger than ii.  */
+  if (early_start == INT_MIN && late_start == INT_MAX)
+    early_start = NODE_ASAP (u_node);
+  else if (early_start == INT_MIN)
+    early_start = late_start - (ii - 1);
+  late_start = MIN (late_start, early_start + (ii - 1));
+
+  /* Apply memory dependence limits.  */
+  start = MAX (start, early_start);
+  end = MIN (end, late_start);
+
+  if (dump_file && (psp_not_empty || pss_not_empty))
+    fprintf (dump_file, "%11s %11d %11d %11s %5s final window\n",
+	     "", start, end, "", "");
+
+  /* If there are at least as many successors as predecessors, schedule the
+     node close to its successors.  */
+  if (pss_not_empty && count_succs >= count_preds)
+    {
+      int tmp = end;
+      end = start;
+      start = tmp;
       step = -1;
-
-      if (dump_file)
-        fprintf (dump_file,
-                 "\nScheduling %d (%d) in a window (%d..%d) with step %d\n",
-                 u_node->cuid, INSN_UID (u_node->insn), start, end, step);
-
     }
 
-  else if (psp_not_empty && pss_not_empty)
-    {
-      int early_start = INT_MIN;
-      int late_start = INT_MAX;
-      int count_preds = 0;
-      int count_succs = 0;
-
-      start = INT_MIN;
-      end = INT_MAX;
-      for (e = u_node->in; e != 0; e = e->next_in)
-	{
-	  ddg_node_ptr v_node = e->src;
-
-	  if (dump_file)
-	    {
-              fprintf (dump_file, "\nProcessing edge:");
-              print_ddg_edge (dump_file, e);
-	      fprintf (dump_file,
-		       "\nScheduling %d (%d) in psp_pss_not_empty,"
-		       " checking p %d (%d): ", u_node->cuid, INSN_UID
-		       (u_node->insn), v_node->cuid, INSN_UID
-		       (v_node->insn));
-	    }
-
-	  if (TEST_BIT (sched_nodes, v_node->cuid))
-	    {
-              int p_st = SCHED_TIME (v_node);
-
-	      early_start = MAX (early_start,
-				 p_st + e->latency
-				 - (e->distance * ii));
-
-              if (dump_file)
-                fprintf (dump_file,
-                         "pred st = %d; early_start = %d; latency = %d",
-                         p_st, early_start, e->latency);
-
-              if (e->type == TRUE_DEP && e->data_type == REG_DEP)
-                count_preds++;
-
-	      if (e->data_type == MEM_DEP)
-		end = MIN (end, SCHED_TIME (v_node) + ii - 1);
-	    }
-          else if (dump_file)
-            fprintf (dump_file, "the node is not scheduled\n");
-
-	}
-      for (e = u_node->out; e != 0; e = e->next_out)
-	{
-	  ddg_node_ptr v_node = e->dest;
-
-	  if (dump_file)
-	    {
-              fprintf (dump_file, "\nProcessing edge:");
-              print_ddg_edge (dump_file, e);
-	      fprintf (dump_file,
-		       "\nScheduling %d (%d) in psp_pss_not_empty,"
-		       " checking s %d (%d): ", u_node->cuid, INSN_UID
-		       (u_node->insn), v_node->cuid, INSN_UID
-		       (v_node->insn));
-	    }
-
-	  if (TEST_BIT (sched_nodes, v_node->cuid))
-	    {
-              int s_st = SCHED_TIME (v_node);
-
-	      late_start = MIN (late_start,
-				s_st - e->latency
-				+ (e->distance * ii));
-
-              if (dump_file)
-                fprintf (dump_file,
-                         "succ st = %d; late_start = %d; latency = %d",
-                         s_st, late_start, e->latency);
-
-               if (e->type == TRUE_DEP && e->data_type == REG_DEP)
-                 count_succs++;
-
-	      if (e->data_type == MEM_DEP)
-		start = MAX (start, SCHED_TIME (v_node) - ii + 1);
-	    }
-          else if (dump_file)
-            fprintf (dump_file, "the node is not scheduled\n");
-
-	}
-      start = MAX (start, early_start);
-      end = MIN (end, MIN (early_start + ii, late_start + 1));
-      step = 1;
-      /* If there are more successors than predecessors schedule the
-         node close to it's successors.  */
-      if (count_succs >= count_preds)
-        {
-          int old_start = start;
-
-          start = end - 1;
-          end = old_start - 1;
-          step = -1;
-        }
-    }
-  else /* psp is empty && pss is empty.  */
-    {
-      start = SCHED_ASAP (u_node);
-      end = start + ii;
-      step = 1;
-    }
+  /* Now that we've finalized the window, make END an exclusive rather
+     than an inclusive bound.  */
+  end += step;
 
   *start_p = start;
   *step_p = step;
@@ -1587,10 +1997,10 @@
       if (dump_file)
 	fprintf (dump_file, "\nEmpty window: start=%d, end=%d, step=%d\n",
 		 start, end, step);
-    return -1;
+      return -1;
     }
 
-    return 0;
+  return 0;
 }
 
 /* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the
@@ -1646,7 +2056,7 @@
       SCHED_TIME (e->src) - (e->distance * ii) == first_cycle_in_window  */
   for (e = u_node->in; e != 0; e = e->next_in)
     if (TEST_BIT (sched_nodes, e->src->cuid)
-	&& ((SCHED_TIME (e->src) - (e->distance * ii)) ==
+	&& ((SCHED_TIME (e->src->cuid) - (e->distance * ii)) ==
              first_cycle_in_window))
       {
 	if (dump_file)
@@ -1671,7 +2081,7 @@
       SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window  */
   for (e = u_node->out; e != 0; e = e->next_out)
     if (TEST_BIT (sched_nodes, e->dest->cuid)
-	&& ((SCHED_TIME (e->dest) + (e->distance * ii)) ==
+	&& ((SCHED_TIME (e->dest->cuid) + (e->distance * ii)) ==
              last_cycle_in_window))
       {
 	if (dump_file)
@@ -1695,7 +2105,7 @@
    last row of the scheduling window)  */
 
 static bool
-try_scheduling_node_in_cycle (partial_schedule_ptr ps, ddg_node_ptr u_node,
+try_scheduling_node_in_cycle (partial_schedule_ptr ps,
 			      int u, int cycle, sbitmap sched_nodes,
 			      int *num_splits, sbitmap must_precede,
 			      sbitmap must_follow)
@@ -1704,11 +2114,10 @@
   bool success = 0;
 
   verify_partial_schedule (ps, sched_nodes);
-  psi = ps_add_node_check_conflicts (ps, u_node, cycle,
-				     must_precede, must_follow);
+  psi = ps_add_node_check_conflicts (ps, u, cycle, must_precede, must_follow);
   if (psi)
     {
-      SCHED_TIME (u_node) = cycle;
+      SCHED_TIME (u) = cycle;
       SET_BIT (sched_nodes, u);
       success = 1;
       *num_splits = 0;
@@ -1760,23 +2169,17 @@
 	      continue;
 	    }
 
-	  if (JUMP_P (insn)) /* Closing branch handled later.  */
-	    {
-	      RESET_BIT (tobe_scheduled, u);
-	      continue;
-	    }
-
 	  if (TEST_BIT (sched_nodes, u))
 	    continue;
 
 	  /* Try to get non-empty scheduling window.  */
 	 success = 0;
-         if (get_sched_window (ps, nodes_order, i, sched_nodes, ii, &start,
+         if (get_sched_window (ps, u_node, sched_nodes, ii, &start,
                                 &step, &end) == 0)
             {
               if (dump_file)
-                fprintf (dump_file, "\nTrying to schedule node %d \
-                        INSN = %d  in (%d .. %d) step %d\n", u, (INSN_UID
+                fprintf (dump_file, "\nTrying to schedule node %d "
+			 "INSN = %d  in (%d .. %d) step %d\n", u, (INSN_UID
                         (g->nodes[u].insn)), start, end, step);
 
               gcc_assert ((step > 0 && start < end)
@@ -1788,26 +2191,13 @@
 
               for (c = start; c != end; c += step)
                 {
-                  sbitmap tmp_precede = NULL;
-                  sbitmap tmp_follow = NULL;
-
-                  if (c == start)
-                    {
-                      if (step == 1)
-                        tmp_precede = must_precede;
-                      else      /* step == -1.  */
-                        tmp_follow = must_follow;
-                    }
-                  if (c == end - step)
-                    {
-                      if (step == 1)
-                        tmp_follow = must_follow;
-                      else      /* step == -1.  */
-                        tmp_precede = must_precede;
-                    }
+		  sbitmap tmp_precede, tmp_follow;
 
+                  set_must_precede_follow (&tmp_follow, must_follow, 
+		                           &tmp_precede, must_precede, 
+                                           c, start, end, step);
                   success =
-                    try_scheduling_node_in_cycle (ps, u_node, u, c,
+                    try_scheduling_node_in_cycle (ps, u, c,
                                                   sched_nodes,
                                                   &num_splits, tmp_precede,
                                                   tmp_follow);
@@ -1883,6 +2273,7 @@
   int ii = ps->ii;
   int new_ii = ii + 1;
   int row;
+  int *rows_length_new;
 
   verify_partial_schedule (ps, sched_nodes);
 
@@ -1893,18 +2284,20 @@
   if (dump_file)
     fprintf (dump_file, "split_row=%d\n", split_row);
 
-  normalize_sched_times (ps);
-  rotate_partial_schedule (ps, ps->min_cycle);
+  reset_sched_times (ps, PS_MIN_CYCLE (ps));
+  rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
 
   rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr));
+  rows_length_new = (int *) xcalloc (new_ii, sizeof (int));
   for (row = 0; row < split_row; row++)
     {
       rows_new[row] = ps->rows[row];
+      rows_length_new[row] = ps->rows_length[row];
       ps->rows[row] = NULL;
       for (crr_insn = rows_new[row];
 	   crr_insn; crr_insn = crr_insn->next_in_row)
 	{
-	  ddg_node_ptr u = crr_insn->node;
+	  int u = crr_insn->id;
 	  int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii);
 
 	  SCHED_TIME (u) = new_time;
@@ -1920,11 +2313,12 @@
   for (row = split_row; row < ii; row++)
     {
       rows_new[row + 1] = ps->rows[row];
+      rows_length_new[row + 1] = ps->rows_length[row];
       ps->rows[row] = NULL;
       for (crr_insn = rows_new[row + 1];
 	   crr_insn; crr_insn = crr_insn->next_in_row)
 	{
-	  ddg_node_ptr u = crr_insn->node;
+	  int u = crr_insn->id;
 	  int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii) + 1;
 
 	  SCHED_TIME (u) = new_time;
@@ -1941,6 +2335,8 @@
     + (SMODULO (ps->max_cycle, ii) >= split_row ? 1 : 0);
   free (ps->rows);
   ps->rows = rows_new;
+  free (ps->rows_length);
+  ps->rows_length = rows_length_new;
   ps->ii = new_ii;
   gcc_assert (ps->min_cycle >= 0);
 
@@ -1962,24 +2358,24 @@
 {
   ddg_edge_ptr e;
   int lower = INT_MIN, upper = INT_MAX;
-  ddg_node_ptr crit_pred = NULL;
-  ddg_node_ptr crit_succ = NULL;
+  int crit_pred = -1;
+  int crit_succ = -1;
   int crit_cycle;
 
   for (e = u_node->in; e != 0; e = e->next_in)
     {
-      ddg_node_ptr v_node = e->src;
+      int v = e->src->cuid;
 
-      if (TEST_BIT (sched_nodes, v_node->cuid)
-	  && (low == SCHED_TIME (v_node) + e->latency - (e->distance * ii)))
-	if (SCHED_TIME (v_node) > lower)
+      if (TEST_BIT (sched_nodes, v)
+	  && (low == SCHED_TIME (v) + e->latency - (e->distance * ii)))
+	if (SCHED_TIME (v) > lower)
 	  {
-	    crit_pred = v_node;
-	    lower = SCHED_TIME (v_node);
+	    crit_pred = v;
+	    lower = SCHED_TIME (v);
 	  }
     }
 
-  if (crit_pred != NULL)
+  if (crit_pred >= 0)
     {
       crit_cycle = SCHED_TIME (crit_pred) + 1;
       return SMODULO (crit_cycle, ii);
@@ -1987,17 +2383,18 @@
 
   for (e = u_node->out; e != 0; e = e->next_out)
     {
-      ddg_node_ptr v_node = e->dest;
-      if (TEST_BIT (sched_nodes, v_node->cuid)
-	  && (up == SCHED_TIME (v_node) - e->latency + (e->distance * ii)))
-	if (SCHED_TIME (v_node) < upper)
+      int v = e->dest->cuid;
+
+      if (TEST_BIT (sched_nodes, v)
+	  && (up == SCHED_TIME (v) - e->latency + (e->distance * ii)))
+	if (SCHED_TIME (v) < upper)
 	  {
-	    crit_succ = v_node;
-	    upper = SCHED_TIME (v_node);
+	    crit_succ = v;
+	    upper = SCHED_TIME (v);
 	  }
     }
 
-  if (crit_succ != NULL)
+  if (crit_succ >= 0)
     {
       crit_cycle = SCHED_TIME (crit_succ);
       return SMODULO (crit_cycle, ii);
@@ -2016,16 +2413,23 @@
   ps_insn_ptr crr_insn;
 
   for (row = 0; row < ps->ii; row++)
-    for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
-      {
-	ddg_node_ptr u = crr_insn->node;
-
-	gcc_assert (TEST_BIT (sched_nodes, u->cuid));
-	/* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
-	   popcount (sched_nodes) == number of insns in ps.  */
-	gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
-	gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
-      }
+    {
+      int length = 0;
+      
+      for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
+	{
+	  int u = crr_insn->id;
+	  
+	  length++;
+	  gcc_assert (TEST_BIT (sched_nodes, u));
+	  /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
+	     popcount (sched_nodes) == number of insns in ps.  */
+	  gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
+	  gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
+	}
+      
+      gcc_assert (ps->rows_length[row] == length);
+    }
 }
 
 
@@ -2431,6 +2835,8 @@
 {
   partial_schedule_ptr ps = XNEW (struct partial_schedule);
   ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
+  ps->rows_length = (int *) xcalloc (ii, sizeof (int));
+  ps->reg_moves = NULL;
   ps->ii = ii;
   ps->history = history;
   ps->min_cycle = INT_MAX;
@@ -2465,10 +2871,19 @@
 static void
 free_partial_schedule (partial_schedule_ptr ps)
 {
+  ps_reg_move_info *move;
+  unsigned int i;
+
   if (!ps)
     return;
+
+  FOR_EACH_VEC_ELT (ps_reg_move_info, ps->reg_moves, i, move)
+    sbitmap_free (move->uses);
+  VEC_free (ps_reg_move_info, heap, ps->reg_moves);
+
   free_ps_insns (ps);
   free (ps->rows);
+  free (ps->rows_length);
   free (ps);
 }
 
@@ -2486,6 +2901,8 @@
   ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii
 						 * sizeof (ps_insn_ptr));
   memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr));
+  ps->rows_length = (int *) xrealloc (ps->rows_length, new_ii * sizeof (int));
+  memset (ps->rows_length, 0, new_ii * sizeof (int));
   ps->ii = new_ii;
   ps->min_cycle = INT_MAX;
   ps->max_cycle = INT_MIN;
@@ -2505,8 +2922,13 @@
       fprintf (dump, "\n[ROW %d ]: ", i);
       while (ps_i)
 	{
-	  fprintf (dump, "%d, ",
-		   INSN_UID (ps_i->node->insn));
+	  rtx insn = ps_rtl_insn (ps, ps_i->id);
+
+	  if (JUMP_P (insn))
+	    fprintf (dump, "%d (branch), ", INSN_UID (insn));
+	  else
+	    fprintf (dump, "%d, ", INSN_UID (insn));
+	
 	  ps_i = ps_i->next_in_row;
 	}
     }
@@ -2514,36 +2936,31 @@
 
 /* Creates an object of PS_INSN and initializes it to the given parameters.  */
 static ps_insn_ptr
-create_ps_insn (ddg_node_ptr node, int rest_count, int cycle)
+create_ps_insn (int id, int cycle)
 {
   ps_insn_ptr ps_i = XNEW (struct ps_insn);
 
-  ps_i->node = node;
+  ps_i->id = id;
   ps_i->next_in_row = NULL;
   ps_i->prev_in_row = NULL;
-  ps_i->row_rest_count = rest_count;
   ps_i->cycle = cycle;
 
   return ps_i;
 }
 
 
-/* Removes the given PS_INSN from the partial schedule.  Returns false if the
-   node is not found in the partial schedule, else returns true.  */
-static bool
+/* Removes the given PS_INSN from the partial schedule.  */  
+static void 
 remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
 {
   int row;
 
-  if (!ps || !ps_i)
-    return false;
-
+  gcc_assert (ps && ps_i);
+  
   row = SMODULO (ps_i->cycle, ps->ii);
   if (! ps_i->prev_in_row)
     {
-      if (ps_i != ps->rows[row])
-	return false;
-
+      gcc_assert (ps_i == ps->rows[row]);
       ps->rows[row] = ps_i->next_in_row;
       if (ps->rows[row])
 	ps->rows[row]->prev_in_row = NULL;
@@ -2554,8 +2971,10 @@
       if (ps_i->next_in_row)
 	ps_i->next_in_row->prev_in_row = ps_i->prev_in_row;
     }
+   
+  ps->rows_length[row] -= 1; 
   free (ps_i);
-  return true;
+  return;
 }
 
 /* Unlike what literature describes for modulo scheduling (which focuses
@@ -2571,6 +2990,7 @@
   ps_insn_ptr next_ps_i;
   ps_insn_ptr first_must_follow = NULL;
   ps_insn_ptr last_must_precede = NULL;
+  ps_insn_ptr last_in_row = NULL;
   int row;
 
   if (! ps_i)
@@ -2585,10 +3005,11 @@
        next_ps_i;
        next_ps_i = next_ps_i->next_in_row)
     {
-      if (must_follow && TEST_BIT (must_follow, next_ps_i->node->cuid)
+      if (must_follow
+	  && TEST_BIT (must_follow, next_ps_i->id)
 	  && ! first_must_follow)
         first_must_follow = next_ps_i;
-      if (must_precede && TEST_BIT (must_precede, next_ps_i->node->cuid))
+      if (must_precede && TEST_BIT (must_precede, next_ps_i->id))
         {
           /* If we have already met a node that must follow, then
 	     there is no possible column.  */
@@ -2597,8 +3018,37 @@
 	  else
             last_must_precede = next_ps_i;
         }
+      /* The closing branch must be the last in the row.  */
+      if (must_precede 
+	  && TEST_BIT (must_precede, next_ps_i->id)
+	  && JUMP_P (ps_rtl_insn (ps, next_ps_i->id)))
+	return false;
+             
+       last_in_row = next_ps_i;
     }
 
+  /* The closing branch is scheduled as well.  Make sure there is no
+     dependent instruction after it as the branch should be the last
+     instruction in the row.  */
+  if (JUMP_P (ps_rtl_insn (ps, ps_i->id)))
+    {
+      if (first_must_follow)
+	return false;
+      if (last_in_row)
+	{
+	  /* Make the branch the last in the row.  New instructions
+	     will be inserted at the beginning of the row or after the
+	     last must_precede instruction thus the branch is guaranteed
+	     to remain the last instruction in the row.  */
+	  last_in_row->next_in_row = ps_i;
+	  ps_i->prev_in_row = last_in_row;
+	  ps_i->next_in_row = NULL;
+	}
+      else
+	ps->rows[row] = ps_i;
+      return true;
+    }
+  
   /* Now insert the node after INSERT_AFTER_PSI.  */
 
   if (! last_must_precede)
@@ -2631,7 +3081,6 @@
 {
   ps_insn_ptr prev, next;
   int row;
-  ddg_node_ptr next_node;
 
   if (!ps || !ps_i)
     return false;
@@ -2641,11 +3090,9 @@
   if (! ps_i->next_in_row)
     return false;
 
-  next_node = ps_i->next_in_row->node;
-
   /* Check if next_in_row is dependent on ps_i, both having same sched
      times (typically ANTI_DEP).  If so, ps_i cannot skip over it.  */
-  if (must_follow && TEST_BIT (must_follow, next_node->cuid))
+  if (must_follow && TEST_BIT (must_follow, ps_i->next_in_row->id))
     return false;
 
   /* Advance PS_I over its next_in_row in the doubly linked list.  */
@@ -2676,21 +3123,16 @@
    before/after (respectively) the node pointed to by PS_I when scheduled
    in the same cycle.  */
 static ps_insn_ptr
-add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle,
+add_node_to_ps (partial_schedule_ptr ps, int id, int cycle,
 		sbitmap must_precede, sbitmap must_follow)
 {
   ps_insn_ptr ps_i;
-  int rest_count = 1;
   int row = SMODULO (cycle, ps->ii);
 
-  if (ps->rows[row]
-      && ps->rows[row]->row_rest_count >= issue_rate)
+  if (ps->rows_length[row] >= issue_rate)
     return NULL;
 
-  if (ps->rows[row])
-    rest_count += ps->rows[row]->row_rest_count;
-
-  ps_i = create_ps_insn (node, rest_count, cycle);
+  ps_i = create_ps_insn (id, cycle);
 
   /* Finds and inserts PS_I according to MUST_FOLLOW and
      MUST_PRECEDE.  */
@@ -2700,6 +3142,7 @@
       return NULL;
     }
 
+  ps->rows_length[row] += 1;
   return ps_i;
 }
 
@@ -2741,7 +3184,7 @@
 	   crr_insn;
 	   crr_insn = crr_insn->next_in_row)
 	{
-	  rtx insn = crr_insn->node->insn;
+	  rtx insn = ps_rtl_insn (ps, crr_insn->id);
 
 	  if (!NONDEBUG_INSN_P (insn))
 	    continue;
@@ -2778,7 +3221,7 @@
    cuid N must be come before/after (respectively) the node pointed to by
    PS_I when scheduled in the same cycle.  */
 ps_insn_ptr
-ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n,
+ps_add_node_check_conflicts (partial_schedule_ptr ps, int n,
    			     int c, sbitmap must_precede,
 			     sbitmap must_follow)
 {
@@ -2820,6 +3263,22 @@
   return ps_i;
 }
 
+/* Calculate the stage count of the partial schedule PS.  The calculation
+   takes into account the rotation amount passed in ROTATION_AMOUNT.  */
+int
+calculate_stage_count (partial_schedule_ptr ps, int rotation_amount)
+{
+  int new_min_cycle = PS_MIN_CYCLE (ps) - rotation_amount;
+  int new_max_cycle = PS_MAX_CYCLE (ps) - rotation_amount;
+  int stage_count = CALC_STAGE_COUNT (-1, new_min_cycle, ps->ii);
+
+  /* The calculation of stage count is done adding the number of stages
+     before cycle zero and after cycle zero.  */ 
+  stage_count += CALC_STAGE_COUNT (new_max_cycle, 0, ps->ii);
+
+  return stage_count;
+}
+
 /* Rotate the rows of PS such that insns scheduled at time
    START_CYCLE will appear in row 0.  Updates max/min_cycles.  */
 void
@@ -2837,11 +3296,16 @@
   for (i = 0; i < backward_rotates; i++)
     {
       ps_insn_ptr first_row = ps->rows[0];
+      int first_row_length = ps->rows_length[0];
 
       for (row = 0; row < last_row; row++)
-	ps->rows[row] = ps->rows[row+1];
+	{
+	  ps->rows[row] = ps->rows[row + 1];
+	  ps->rows_length[row] = ps->rows_length[row + 1]; 
+	}
 
       ps->rows[last_row] = first_row;
+      ps->rows_length[last_row] = first_row_length;
     }
 
   ps->max_cycle -= start_cycle;
--- a/src/gcc/optabs.c
+++ b/src/gcc/optabs.c
@@ -225,6 +225,61 @@
   return 1;
 }
 
+/* Given two input operands, OP0 and OP1, determine what the correct from_mode
+   for a widening operation would be.  In most cases this would be OP0, but if
+   that's a constant it'll be VOIDmode, which isn't useful.  */
+
+static enum machine_mode
+widened_mode (enum machine_mode to_mode, rtx op0, rtx op1)
+{
+  enum machine_mode m0 = GET_MODE (op0);
+  enum machine_mode m1 = GET_MODE (op1);
+  enum machine_mode result;
+
+  if (m0 == VOIDmode && m1 == VOIDmode)
+    return to_mode;
+  else if (m0 == VOIDmode || GET_MODE_SIZE (m0) < GET_MODE_SIZE (m1))
+    result = m1;
+  else
+    result = m0;
+
+  if (GET_MODE_SIZE (result) > GET_MODE_SIZE (to_mode))
+    return to_mode;
+
+  return result;
+}
+
+/* Find a widening optab even if it doesn't widen as much as we want.
+   E.g. if from_mode is HImode, and to_mode is DImode, and there is no
+   direct HI->SI insn, then return SI->DI, if that exists.
+   If PERMIT_NON_WIDENING is non-zero then this can be used with
+   non-widening optabs also.  */
+
+enum insn_code
+find_widening_optab_handler_and_mode (optab op, enum machine_mode to_mode,
+				      enum machine_mode from_mode,
+				      int permit_non_widening,
+				      enum machine_mode *found_mode)
+{
+  for (; (permit_non_widening || from_mode != to_mode)
+	 && GET_MODE_SIZE (from_mode) <= GET_MODE_SIZE (to_mode)
+	 && from_mode != VOIDmode;
+       from_mode = GET_MODE_WIDER_MODE (from_mode))
+    {
+      enum insn_code handler = widening_optab_handler (op, to_mode,
+						       from_mode);
+
+      if (handler != CODE_FOR_nothing)
+	{
+	  if (found_mode)
+	    *found_mode = from_mode;
+	  return handler;
+	}
+    }
+
+  return CODE_FOR_nothing;
+}
+
 /* Widen OP to MODE and return the rtx for the widened operand.  UNSIGNEDP
    says whether OP is signed or unsigned.  NO_EXTEND is nonzero if we need
    not actually do a sign-extend or zero-extend, but can leave the
@@ -399,6 +454,14 @@
       return TYPE_UNSIGNED (type) ?
 	vec_widen_umult_lo_optab : vec_widen_smult_lo_optab;
 
+    case VEC_WIDEN_LSHIFT_HI_EXPR:
+      return TYPE_UNSIGNED (type) ?
+        vec_widen_ushiftl_hi_optab : vec_widen_sshiftl_hi_optab;
+
+    case VEC_WIDEN_LSHIFT_LO_EXPR:
+      return TYPE_UNSIGNED (type) ?
+        vec_widen_ushiftl_lo_optab : vec_widen_sshiftl_lo_optab;
+
     case VEC_UNPACK_HI_EXPR:
       return TYPE_UNSIGNED (type) ?
 	vec_unpacku_hi_optab : vec_unpacks_hi_optab;
@@ -517,8 +580,9 @@
     optab_for_tree_code (ops->code, TREE_TYPE (oprnd0), optab_default);
   if (ops->code == WIDEN_MULT_PLUS_EXPR
       || ops->code == WIDEN_MULT_MINUS_EXPR)
-    icode = (int) optab_handler (widen_pattern_optab,
-				 TYPE_MODE (TREE_TYPE (ops->op2)));
+    icode = (int) find_widening_optab_handler (widen_pattern_optab,
+					       TYPE_MODE (TREE_TYPE (ops->op2)),
+					       tmode0, 0);
   else
     icode = (int) optab_handler (widen_pattern_optab, tmode0);
   gcc_assert (icode != CODE_FOR_nothing);
@@ -1389,7 +1453,9 @@
 		       rtx target, int unsignedp, enum optab_methods methods,
 		       rtx last)
 {
-  int icode = (int) optab_handler (binoptab, mode);
+  enum machine_mode from_mode = widened_mode (mode, op0, op1);
+  int icode = (int) find_widening_optab_handler (binoptab, mode,
+						 from_mode, 1);
   enum machine_mode mode0 = insn_data[icode].operand[1].mode;
   enum machine_mode mode1 = insn_data[icode].operand[2].mode;
   enum machine_mode tmp_mode;
@@ -1546,7 +1612,9 @@
   /* If we can do it with a three-operand insn, do so.  */
 
   if (methods != OPTAB_MUST_WIDEN
-      && optab_handler (binoptab, mode) != CODE_FOR_nothing)
+      && find_widening_optab_handler (binoptab, mode,
+				      widened_mode (mode, op0, op1), 1)
+	    != CODE_FOR_nothing)
     {
       temp = expand_binop_directly (mode, binoptab, op0, op1, target,
 				    unsignedp, methods, last);
@@ -1586,8 +1654,9 @@
 
   if (binoptab == smul_optab
       && GET_MODE_WIDER_MODE (mode) != VOIDmode
-      && (optab_handler ((unsignedp ? umul_widen_optab : smul_widen_optab),
-			 GET_MODE_WIDER_MODE (mode))
+      && (widening_optab_handler ((unsignedp ? umul_widen_optab
+					     : smul_widen_optab),
+				  GET_MODE_WIDER_MODE (mode), mode)
 	  != CODE_FOR_nothing))
     {
       temp = expand_binop (GET_MODE_WIDER_MODE (mode),
@@ -1618,9 +1687,11 @@
 	if (optab_handler (binoptab, wider_mode) != CODE_FOR_nothing
 	    || (binoptab == smul_optab
 		&& GET_MODE_WIDER_MODE (wider_mode) != VOIDmode
-		&& (optab_handler ((unsignedp ? umul_widen_optab
-				    : smul_widen_optab),
-				   GET_MODE_WIDER_MODE (wider_mode))
+		&& (find_widening_optab_handler ((unsignedp
+						  ? umul_widen_optab
+						  : smul_widen_optab),
+						 GET_MODE_WIDER_MODE (wider_mode),
+						 mode, 0)
 		    != CODE_FOR_nothing)))
 	  {
 	    rtx xop0 = op0, xop1 = op1;
@@ -2043,8 +2114,8 @@
       && optab_handler (add_optab, word_mode) != CODE_FOR_nothing)
     {
       rtx product = NULL_RTX;
-
-      if (optab_handler (umul_widen_optab, mode) != CODE_FOR_nothing)
+      if (widening_optab_handler (umul_widen_optab, mode, word_mode)
+	    != CODE_FOR_nothing)
 	{
 	  product = expand_doubleword_mult (mode, op0, op1, target,
 					    true, methods);
@@ -2053,7 +2124,8 @@
 	}
 
       if (product == NULL_RTX
-	  && optab_handler (smul_widen_optab, mode) != CODE_FOR_nothing)
+	  && widening_optab_handler (smul_widen_optab, mode, word_mode)
+		!= CODE_FOR_nothing)
 	{
 	  product = expand_doubleword_mult (mode, op0, op1, target,
 					    false, methods);
@@ -2144,7 +2216,8 @@
 	   wider_mode != VOIDmode;
 	   wider_mode = GET_MODE_WIDER_MODE (wider_mode))
 	{
-	  if (optab_handler (binoptab, wider_mode) != CODE_FOR_nothing
+	  if (find_widening_optab_handler (binoptab, wider_mode, mode, 1)
+		  != CODE_FOR_nothing
 	      || (methods == OPTAB_LIB
 		  && optab_libfunc (binoptab, wider_mode)))
 	    {
@@ -6171,6 +6244,9 @@
   init_optab (usashl_optab, US_ASHIFT);
   init_optab (ashr_optab, ASHIFTRT);
   init_optab (lshr_optab, LSHIFTRT);
+  init_optabv (vashl_optab, ASHIFT);
+  init_optabv (vashr_optab, ASHIFTRT);
+  init_optabv (vlshr_optab, LSHIFTRT);
   init_optab (rotl_optab, ROTATE);
   init_optab (rotr_optab, ROTATERT);
   init_optab (smin_optab, SMIN);
@@ -6283,6 +6359,10 @@
   init_optab (vec_widen_umult_lo_optab, UNKNOWN);
   init_optab (vec_widen_smult_hi_optab, UNKNOWN);
   init_optab (vec_widen_smult_lo_optab, UNKNOWN);
+  init_optab (vec_widen_ushiftl_hi_optab, UNKNOWN);
+  init_optab (vec_widen_ushiftl_lo_optab, UNKNOWN);
+  init_optab (vec_widen_sshiftl_hi_optab, UNKNOWN);
+  init_optab (vec_widen_sshiftl_lo_optab, UNKNOWN);
   init_optab (vec_unpacks_hi_optab, UNKNOWN);
   init_optab (vec_unpacks_lo_optab, UNKNOWN);
   init_optab (vec_unpacku_hi_optab, UNKNOWN);
--- a/src/gcc/optabs.h
+++ b/src/gcc/optabs.h
@@ -42,6 +42,11 @@
   int insn_code;
 };
 
+struct widening_optab_handlers
+{
+  struct optab_handlers handlers[NUM_MACHINE_MODES][NUM_MACHINE_MODES];
+};
+
 struct optab_d
 {
   enum rtx_code code;
@@ -50,6 +55,7 @@
   void (*libcall_gen)(struct optab_d *, const char *name, char suffix,
 		      enum machine_mode);
   struct optab_handlers handlers[NUM_MACHINE_MODES];
+  struct widening_optab_handlers *widening;
 };
 typedef struct optab_d * optab;
 
@@ -344,6 +350,12 @@
   OTI_vec_widen_umult_lo,
   OTI_vec_widen_smult_hi,
   OTI_vec_widen_smult_lo,
+  /* Widening shift left.
+     The high/low part of the resulting vector is returned.  */
+  OTI_vec_widen_ushiftl_hi,
+  OTI_vec_widen_ushiftl_lo,
+  OTI_vec_widen_sshiftl_hi,
+  OTI_vec_widen_sshiftl_lo,
   /* Extract and widen the high/low part of a vector of signed or
      floating point elements.  */
   OTI_vec_unpacks_hi,
@@ -536,6 +548,10 @@
 #define vec_widen_umult_lo_optab (&optab_table[OTI_vec_widen_umult_lo])
 #define vec_widen_smult_hi_optab (&optab_table[OTI_vec_widen_smult_hi])
 #define vec_widen_smult_lo_optab (&optab_table[OTI_vec_widen_smult_lo])
+#define vec_widen_ushiftl_hi_optab (&optab_table[OTI_vec_widen_ushiftl_hi])
+#define vec_widen_ushiftl_lo_optab (&optab_table[OTI_vec_widen_ushiftl_lo])
+#define vec_widen_sshiftl_hi_optab (&optab_table[OTI_vec_widen_sshiftl_hi])
+#define vec_widen_sshiftl_lo_optab (&optab_table[OTI_vec_widen_sshiftl_lo])
 #define vec_unpacks_hi_optab (&optab_table[OTI_vec_unpacks_hi])
 #define vec_unpacks_lo_optab (&optab_table[OTI_vec_unpacks_lo])
 #define vec_unpacku_hi_optab (&optab_table[OTI_vec_unpacku_hi])
@@ -578,6 +594,9 @@
   COI_satfract,
   COI_satfractuns,
 
+  COI_vec_load_lanes,
+  COI_vec_store_lanes,
+
   COI_MAX
 };
 
@@ -598,6 +617,8 @@
 #define fractuns_optab (&convert_optab_table[COI_fractuns])
 #define satfract_optab (&convert_optab_table[COI_satfract])
 #define satfractuns_optab (&convert_optab_table[COI_satfractuns])
+#define vec_load_lanes_optab (&convert_optab_table[COI_vec_load_lanes])
+#define vec_store_lanes_optab (&convert_optab_table[COI_vec_store_lanes])
 
 /* Contains the optab used for each rtx code.  */
 extern optab code_to_optab[NUM_RTX_CODE + 1];
@@ -794,6 +815,15 @@
 extern void emit_unop_insn (int, rtx, rtx, enum rtx_code);
 extern bool maybe_emit_unop_insn (int, rtx, rtx, enum rtx_code);
 
+/* Find a widening optab even if it doesn't widen as much as we want.  */
+#define find_widening_optab_handler(A,B,C,D) \
+  find_widening_optab_handler_and_mode (A, B, C, D, NULL)
+extern enum insn_code find_widening_optab_handler_and_mode (optab,
+							    enum machine_mode,
+							    enum machine_mode,
+							    int,
+							    enum machine_mode *);
+
 /* An extra flag to control optab_for_tree_code's behavior.  This is needed to
    distinguish between machines with a vector shift that takes a scalar for the
    shift amount vs. machines that take a vector for the shift amount.  */
@@ -869,6 +899,23 @@
 			   + (int) CODE_FOR_nothing);
 }
 
+/* Like optab_handler, but for widening_operations that have a TO_MODE and
+  a FROM_MODE.  */
+
+static inline enum insn_code
+widening_optab_handler (optab op, enum machine_mode to_mode,
+			enum machine_mode from_mode)
+{
+  if (to_mode == from_mode || from_mode == VOIDmode)
+    return optab_handler (op, to_mode);
+
+  if (op->widening)
+    return (enum insn_code) (op->widening->handlers[(int) to_mode][(int) from_mode].insn_code
+			     + (int) CODE_FOR_nothing);
+
+  return CODE_FOR_nothing;
+}
+
 /* Record that insn CODE should be used to implement mode MODE of OP.  */
 
 static inline void
@@ -877,6 +924,26 @@
   op->handlers[(int) mode].insn_code = (int) code - (int) CODE_FOR_nothing;
 }
 
+/* Like set_optab_handler, but for widening operations that have a TO_MODE
+   and a FROM_MODE.  */
+
+static inline void
+set_widening_optab_handler (optab op, enum machine_mode to_mode,
+			    enum machine_mode from_mode, enum insn_code code)
+{
+  if (to_mode == from_mode)
+    set_optab_handler (op, to_mode, code);
+  else
+    {
+      if (op->widening == NULL)
+	op->widening = (struct widening_optab_handlers *)
+	      xcalloc (1, sizeof (struct widening_optab_handlers));
+
+      op->widening->handlers[(int) to_mode][(int) from_mode].insn_code
+	  = (int) code - (int) CODE_FOR_nothing;
+    }
+}
+
 /* Return the insn used to perform conversion OP from mode FROM_MODE
    to mode TO_MODE; return CODE_FOR_nothing if the target does not have
    such an insn.  */
--- a/src/gcc/opts.c
+++ b/src/gcc/opts.c
@@ -823,6 +823,12 @@
 	  opts->x_flag_split_stack = 0;
 	}
     }
+
+  /* Set PARAM_MAX_STORES_TO_SINK to 0 if either vectorization or if-conversion
+     is disabled.  */
+  if (!opts->x_flag_tree_vectorize || !opts->x_flag_tree_loop_if_convert)
+    maybe_set_param_value (PARAM_MAX_STORES_TO_SINK, 0,
+                           opts->x_param_values, opts_set->x_param_values);
 }
 
 #define LEFT_COLUMN	27
--- a/src/gcc/params.def
+++ b/src/gcc/params.def
@@ -344,6 +344,11 @@
 	 "sms-max-ii-factor",
 	 "A factor for tuning the upper bound that swing modulo scheduler uses for scheduling a loop",
 	 100, 0, 0)
+/* The minimum value of stage count that swing modulo scheduler will generate.  */
+DEFPARAM(PARAM_SMS_MIN_SC,
+        "sms-min-sc",
+        "The minimum value of stage count that swing modulo scheduler will generate.",
+        2, 1, 1)
 DEFPARAM(PARAM_SMS_DFA_HISTORY,
 	 "sms-dfa-history",
 	 "The number of cycles the swing modulo scheduler considers when checking conflicts using DFA",
@@ -883,6 +888,13 @@
 	  "name lookup fails",
 	  1000, 0, 0)
 
+/* Maximum number of conditional store pairs that can be sunk.  */
+DEFPARAM (PARAM_MAX_STORES_TO_SINK,
+          "max-stores-to-sink",
+          "Maximum number of conditional store pairs that can be sunk",
+          2, 0, 0)
+
+
 /*
 Local variables:
 mode:c
--- a/src/gcc/params.h
+++ b/src/gcc/params.h
@@ -206,4 +206,6 @@
   PARAM_VALUE (PARAM_PREFETCH_MIN_INSN_TO_MEM_RATIO)
 #define MIN_NONDEBUG_INSN_UID \
   PARAM_VALUE (PARAM_MIN_NONDEBUG_INSN_UID)
+#define MAX_STORES_TO_SINK \
+  PARAM_VALUE (PARAM_MAX_STORES_TO_SINK)
 #endif /* ! GCC_PARAMS_H */
--- a/src/gcc/recog.c
+++ b/src/gcc/recog.c
@@ -930,7 +930,9 @@
     return ((GET_MODE (op) == VOIDmode || GET_MODE (op) == mode
 	     || mode == VOIDmode)
 	    && (! flag_pic || LEGITIMATE_PIC_OPERAND_P (op))
-	    && LEGITIMATE_CONSTANT_P (op));
+	    && targetm.legitimate_constant_p (mode == VOIDmode
+					      ? GET_MODE (op)
+					      : mode, op));
 
   /* Except for certain constants with VOIDmode, already checked for,
      OP's mode must match MODE if MODE specifies a mode.  */
@@ -1107,7 +1109,9 @@
 	  && (GET_MODE (op) == mode || mode == VOIDmode
 	      || GET_MODE (op) == VOIDmode)
 	  && (! flag_pic || LEGITIMATE_PIC_OPERAND_P (op))
-	  && LEGITIMATE_CONSTANT_P (op));
+	  && targetm.legitimate_constant_p (mode == VOIDmode
+					    ? GET_MODE (op)
+					    : mode, op));
 }
 
 /* Returns 1 if OP is an operand that is a CONST_INT.  */
--- a/src/gcc/regcprop.c
+++ b/src/gcc/regcprop.c
@@ -418,10 +418,9 @@
 
       offset = ((WORDS_BIG_ENDIAN ? wordoffset : 0)
 		+ (BYTES_BIG_ENDIAN ? byteoffset : 0));
-      return gen_rtx_raw_REG (new_mode,
-			      regno + subreg_regno_offset (regno, orig_mode,
-							   offset,
-							   new_mode));
+      regno += subreg_regno_offset (regno, orig_mode, offset, new_mode);
+      if (HARD_REGNO_MODE_OK (regno, new_mode))
+	return gen_rtx_raw_REG (new_mode, regno);
     }
   return NULL_RTX;
 }
--- a/src/gcc/reload1.c
+++ b/src/gcc/reload1.c
@@ -4159,6 +4159,9 @@
 		}
 	      else if (function_invariant_p (x))
 		{
+		  enum machine_mode mode;
+
+		  mode = GET_MODE (SET_DEST (set));
 		  if (GET_CODE (x) == PLUS)
 		    {
 		      /* This is PLUS of frame pointer and a constant,
@@ -4171,12 +4174,11 @@
 		      reg_equiv_invariant[i] = x;
 		      num_eliminable_invariants++;
 		    }
-		  else if (LEGITIMATE_CONSTANT_P (x))
+		  else if (targetm.legitimate_constant_p (mode, x))
 		    reg_equiv_constant[i] = x;
 		  else
 		    {
-		      reg_equiv_memory_loc[i]
-			= force_const_mem (GET_MODE (SET_DEST (set)), x);
+		      reg_equiv_memory_loc[i] = force_const_mem (mode, x);
 		      if (! reg_equiv_memory_loc[i])
 			reg_equiv_init[i] = NULL_RTX;
 		    }
@@ -4478,6 +4480,43 @@
 	}
     }
 }
+
+/* *OP_PTR and *OTHER_PTR are two operands to a conceptual reload.
+   If *OP_PTR is a paradoxical subreg, try to remove that subreg
+   and apply the corresponding narrowing subreg to *OTHER_PTR.
+   Return true if the operands were changed, false otherwise.  */
+
+static bool
+strip_paradoxical_subreg (rtx *op_ptr, rtx *other_ptr)
+{
+  rtx op, inner, other, tem;
+
+  op = *op_ptr;
+  if (GET_CODE (op) != SUBREG)
+    return false;
+
+  inner = SUBREG_REG (op);
+  if (GET_MODE_SIZE (GET_MODE (op)) <= GET_MODE_SIZE (GET_MODE (inner)))
+    return false;
+
+  other = *other_ptr;
+  tem = gen_lowpart_common (GET_MODE (inner), other);
+  if (!tem)
+    return false;
+
+  /* If the lowpart operation turned a hard register into a subreg,
+     rather than simplifying it to another hard register, then the
+     mode change cannot be properly represented.  For example, OTHER
+     might be valid in its current mode, but not in the new one.  */
+  if (GET_CODE (tem) == SUBREG
+      && REG_P (other)
+      && HARD_REGISTER_P (other))
+    return false;
+
+  *op_ptr = inner;
+  *other_ptr = tem;
+  return true;
+}
 
 /* A subroutine of reload_as_needed.  If INSN has a REG_EH_REGION note,
    examine all of the reload insns between PREV and NEXT exclusive, and
@@ -5558,7 +5597,7 @@
      chain reloads or do need an intermediate hard registers.  */
   bool result = true;
   int regno, n, code;
-  rtx out, in, tem, insn;
+  rtx out, in, insn;
   rtx last = get_last_insn ();
 
   /* Make r2 a component of r1.  */
@@ -5577,11 +5616,7 @@
 
   /* If IN is a paradoxical SUBREG, remove it and try to put the
      opposite SUBREG on OUT.  Likewise for a paradoxical SUBREG on OUT.  */
-  if (GET_CODE (in) == SUBREG
-      && (GET_MODE_SIZE (GET_MODE (in))
-	  > GET_MODE_SIZE (GET_MODE (SUBREG_REG (in))))
-      && (tem = gen_lowpart_common (GET_MODE (SUBREG_REG (in)), out)) != 0)
-    in = SUBREG_REG (in), out = tem;
+  strip_paradoxical_subreg (&in, &out);
 
   if (GET_CODE (in) == PLUS
       && (REG_P (XEXP (in, 0))
@@ -6453,6 +6488,8 @@
 
 	      if (regno >= 0
 		  && reg_last_reload_reg[regno] != 0
+		  && (GET_MODE_SIZE (GET_MODE (reg_last_reload_reg[regno]))
+		      >= GET_MODE_SIZE (mode) + byte)
 #ifdef CANNOT_CHANGE_MODE_CLASS
 		  /* Verify that the register it's in can be used in
 		     mode MODE.  */
@@ -6464,24 +6501,12 @@
 		{
 		  enum reg_class rclass = rld[r].rclass, last_class;
 		  rtx last_reg = reg_last_reload_reg[regno];
-		  enum machine_mode need_mode;
 
 		  i = REGNO (last_reg);
 		  i += subreg_regno_offset (i, GET_MODE (last_reg), byte, mode);
 		  last_class = REGNO_REG_CLASS (i);
 
-		  if (byte == 0)
-		    need_mode = mode;
-		  else
-		    need_mode
-		      = smallest_mode_for_size
-		        (GET_MODE_BITSIZE (mode) + byte * BITS_PER_UNIT,
-			 GET_MODE_CLASS (mode) == MODE_PARTIAL_INT
-			 ? MODE_INT : GET_MODE_CLASS (mode));
-
-		  if ((GET_MODE_SIZE (GET_MODE (last_reg))
-		       >= GET_MODE_SIZE (need_mode))
-		      && reg_reloaded_contents[i] == regno
+		  if (reg_reloaded_contents[i] == regno
 		      && TEST_HARD_REG_BIT (reg_reloaded_valid, i)
 		      && HARD_REGNO_MODE_OK (i, rld[r].mode)
 		      && (TEST_HARD_REG_BIT (reg_class_contents[(int) rclass], i)
@@ -7583,7 +7608,6 @@
 	      if (tertiary_icode != CODE_FOR_nothing)
 		{
 		  rtx third_reloadreg = rld[tertiary_reload].reg_rtx;
-		  rtx tem;
 
 		  /* Copy primary reload reg to secondary reload reg.
 		     (Note that these have been swapped above, then
@@ -7592,13 +7616,7 @@
 		  /* If REAL_OLD is a paradoxical SUBREG, remove it
 		     and try to put the opposite SUBREG on
 		     RELOADREG.  */
-		  if (GET_CODE (real_old) == SUBREG
-		      && (GET_MODE_SIZE (GET_MODE (real_old))
-			  > GET_MODE_SIZE (GET_MODE (SUBREG_REG (real_old))))
-		      && 0 != (tem = gen_lowpart_common
-			       (GET_MODE (SUBREG_REG (real_old)),
-				reloadreg)))
-		    real_old = SUBREG_REG (real_old), reloadreg = tem;
+		  strip_paradoxical_subreg (&real_old, &reloadreg);
 
 		  gen_reload (reloadreg, second_reloadreg,
 			      rl->opnum, rl->when_needed);
@@ -8414,16 +8432,8 @@
 
   /* If IN is a paradoxical SUBREG, remove it and try to put the
      opposite SUBREG on OUT.  Likewise for a paradoxical SUBREG on OUT.  */
-  if (GET_CODE (in) == SUBREG
-      && (GET_MODE_SIZE (GET_MODE (in))
-	  > GET_MODE_SIZE (GET_MODE (SUBREG_REG (in))))
-      && (tem = gen_lowpart_common (GET_MODE (SUBREG_REG (in)), out)) != 0)
-    in = SUBREG_REG (in), out = tem;
-  else if (GET_CODE (out) == SUBREG
-	   && (GET_MODE_SIZE (GET_MODE (out))
-	       > GET_MODE_SIZE (GET_MODE (SUBREG_REG (out))))
-	   && (tem = gen_lowpart_common (GET_MODE (SUBREG_REG (out)), in)) != 0)
-    out = SUBREG_REG (out), in = tem;
+  if (!strip_paradoxical_subreg (&in, &out))
+    strip_paradoxical_subreg (&out, &in);
 
   /* How to do this reload can get quite tricky.  Normally, we are being
      asked to reload a simple operand, such as a MEM, a constant, or a pseudo
--- a/src/gcc/reload.c
+++ b/src/gcc/reload.c
@@ -1017,6 +1017,7 @@
 #ifdef CANNOT_CHANGE_MODE_CLASS
       && !CANNOT_CHANGE_MODE_CLASS (GET_MODE (SUBREG_REG (in)), inmode, rclass)
 #endif
+      && contains_reg_of_mode[(int) rclass][(int) GET_MODE (SUBREG_REG (in))]
       && (CONSTANT_P (SUBREG_REG (in))
 	  || GET_CODE (SUBREG_REG (in)) == PLUS
 	  || strict_low
@@ -1123,6 +1124,7 @@
 #ifdef CANNOT_CHANGE_MODE_CLASS
       && !CANNOT_CHANGE_MODE_CLASS (GET_MODE (SUBREG_REG (out)), outmode, rclass)
 #endif
+      && contains_reg_of_mode[(int) rclass][(int) GET_MODE (SUBREG_REG (out))]
       && (CONSTANT_P (SUBREG_REG (out))
 	  || strict_low
 	  || (((REG_P (SUBREG_REG (out))
@@ -4721,7 +4723,8 @@
 	    simplify_gen_subreg (GET_MODE (x), reg_equiv_constant[regno],
 				 GET_MODE (SUBREG_REG (x)), SUBREG_BYTE (x));
 	  gcc_assert (tem);
-	  if (CONSTANT_P (tem) && !LEGITIMATE_CONSTANT_P (tem))
+	  if (CONSTANT_P (tem)
+	      && !targetm.legitimate_constant_p (GET_MODE (x), tem))
 	    {
 	      tem = force_const_mem (GET_MODE (x), tem);
 	      i = find_reloads_address (GET_MODE (tem), &tem, XEXP (tem, 0),
@@ -6049,7 +6052,7 @@
 			   enum reload_type type, int ind_levels)
 {
   if (CONSTANT_P (x)
-      && (! LEGITIMATE_CONSTANT_P (x)
+      && (!targetm.legitimate_constant_p (mode, x)
 	  || targetm.preferred_reload_class (x, rclass) == NO_REGS))
     {
       x = force_const_mem (mode, x);
@@ -6059,7 +6062,7 @@
 
   else if (GET_CODE (x) == PLUS
 	   && CONSTANT_P (XEXP (x, 1))
-	   && (! LEGITIMATE_CONSTANT_P (XEXP (x, 1))
+	   && (!targetm.legitimate_constant_p (GET_MODE (x), XEXP (x, 1))
 	       || targetm.preferred_reload_class (XEXP (x, 1), rclass)
 		   == NO_REGS))
     {
--- a/src/gcc/sched-deps.c
+++ b/src/gcc/sched-deps.c
@@ -450,7 +450,7 @@
 static void add_dependence_list_and_free (struct deps_desc *, rtx,
 					  rtx *, int, enum reg_note);
 static void delete_all_dependences (rtx);
-static void fixup_sched_groups (rtx);
+static void chain_to_prev_insn (rtx);
 
 static void flush_pending_lists (struct deps_desc *, rtx, int, int);
 static void sched_analyze_1 (struct deps_desc *, rtx, rtx);
@@ -1490,7 +1490,7 @@
    the previous nonnote insn.  */
 
 static void
-fixup_sched_groups (rtx insn)
+chain_to_prev_insn (rtx insn)
 {
   sd_iterator_def sd_it;
   dep_t dep;
@@ -1999,7 +1999,7 @@
   static struct reg_pressure_data *pressure_info;
   rtx link;
 
-  gcc_assert (sched_pressure_p);
+  gcc_assert (sched_pressure != SCHED_PRESSURE_NONE);
 
   if (! INSN_P (insn))
     return;
@@ -2030,8 +2030,9 @@
   len = sizeof (struct reg_pressure_data) * ira_reg_class_cover_size;
   pressure_info
     = INSN_REG_PRESSURE (insn) = (struct reg_pressure_data *) xmalloc (len);
-  INSN_MAX_REG_PRESSURE (insn) = (int *) xcalloc (ira_reg_class_cover_size
-						  * sizeof (int), 1);
+  if (sched_pressure == SCHED_PRESSURE_WEIGHTED)
+    INSN_MAX_REG_PRESSURE (insn) = (int *) xcalloc (ira_reg_class_cover_size
+						    * sizeof (int), 1);
   for (i = 0; i < ira_reg_class_cover_size; i++)
     {
       cl = ira_reg_class_cover[i];
@@ -2775,7 +2776,7 @@
       || (NONJUMP_INSN_P (insn) && control_flow_insn_p (insn)))
     reg_pending_barrier = MOVE_BARRIER;
 
-  if (sched_pressure_p)
+  if (sched_pressure != SCHED_PRESSURE_NONE)
     {
       setup_insn_reg_uses (deps, insn);
       setup_insn_reg_pressure_info (insn);
@@ -3076,7 +3077,7 @@
 	       instructions that follow seem like they should be part
 	       of the call group.
 
-	       Also, if we did, fixup_sched_groups() would move the
+	       Also, if we did, chain_to_prev_insn would move the
 	       deps of the debug insn to the call insn, modifying
 	       non-debug post-dependency counts of the debug insn
 	       dependencies and otherwise messing with the scheduling
@@ -3222,6 +3223,37 @@
   return true;
 }
 
+/* Return true if INSN should be made dependent on the previous instruction
+   group, and if all INSN's dependencies should be moved to the first
+   instruction of that group.  */
+
+static bool
+chain_to_prev_insn_p (rtx insn)
+{
+  rtx prev, x;
+
+  /* INSN forms a group with the previous instruction.  */
+  if (SCHED_GROUP_P (insn))
+    return true;
+
+  /* If the previous instruction clobbers a register R and this one sets
+     part of R, the clobber was added specifically to help us track the
+     liveness of R.  There's no point scheduling the clobber and leaving
+     INSN behind, especially if we move the clobber to another block.  */
+  prev = prev_nonnote_nondebug_insn (insn);
+  if (prev
+      && INSN_P (prev)
+      && BLOCK_FOR_INSN (prev) == BLOCK_FOR_INSN (insn)
+      && GET_CODE (PATTERN (prev)) == CLOBBER)
+    {
+      x = XEXP (PATTERN (prev), 0);
+      if (set_of (x, insn))
+	return true;
+    }
+
+  return false;
+}
+
 /* Analyze INSN with DEPS as a context.  */
 void
 deps_analyze_insn (struct deps_desc *deps, rtx insn)
@@ -3358,8 +3390,9 @@
 
   /* Fixup the dependencies in the sched group.  */
   if ((NONJUMP_INSN_P (insn) || JUMP_P (insn))
-      && SCHED_GROUP_P (insn) && !sel_sched_p ())
-    fixup_sched_groups (insn);
+      && chain_to_prev_insn_p (insn)
+      && !sel_sched_p ())
+    chain_to_prev_insn (insn);
 }
 
 /* Initialize DEPS for the new block beginning with HEAD.  */
--- a/src/gcc/sched-int.h
+++ b/src/gcc/sched-int.h
@@ -651,7 +651,7 @@
 
 /* Do register pressure sensitive insn scheduling if the flag is set
    up.  */
-extern bool sched_pressure_p;
+extern enum sched_pressure_algorithm sched_pressure;
 
 /* Map regno -> its cover class.  The map defined only when
    SCHED_PRESSURE_P is true.  */
@@ -773,16 +773,16 @@
 
   short cost;
 
-  /* Set if there's DEF-USE dependence between some speculatively
-     moved load insn and this one.  */
-  unsigned int fed_by_spec_load : 1;
-  unsigned int is_load_insn : 1;
-
   /* '> 0' if priority is valid,
      '== 0' if priority was not yet computed,
      '< 0' if priority in invalid and should be recomputed.  */
   signed char priority_status;
 
+  /* Set if there's DEF-USE dependence between some speculatively
+     moved load insn and this one.  */
+  unsigned int fed_by_spec_load : 1;
+  unsigned int is_load_insn : 1;
+
   /* What speculations are necessary to apply to schedule the instruction.  */
   ds_t todo_spec;
 
@@ -817,6 +817,7 @@
   /* Info about how scheduling the insn changes cost of register
      pressure excess (between source and target).  */
   int reg_pressure_excess_cost_change;
+  int model_index;
 };
 
 typedef struct _haifa_insn_data haifa_insn_data_def;
@@ -839,6 +840,7 @@
 #define INSN_REG_PRESSURE_EXCESS_COST_CHANGE(INSN) \
   (HID (INSN)->reg_pressure_excess_cost_change)
 #define INSN_PRIORITY_STATUS(INSN) (HID (INSN)->priority_status)
+#define INSN_MODEL_INDEX(INSN) (HID (INSN)->model_index)
 
 typedef struct _haifa_deps_insn_data haifa_deps_insn_data_def;
 typedef haifa_deps_insn_data_def *haifa_deps_insn_data_t;
--- a/src/gcc/sched-rgn.c
+++ b/src/gcc/sched-rgn.c
@@ -2943,7 +2943,7 @@
 
   sched_extend_ready_list (rgn_n_insns);
 
-  if (sched_pressure_p)
+  if (sched_pressure == SCHED_PRESSURE_WEIGHTED)
     {
       sched_init_region_reg_pressure_info ();
       for (bb = 0; bb < current_nr_blocks; bb++)
--- a/src/gcc/simplify-rtx.c
+++ b/src/gcc/simplify-rtx.c
@@ -1001,6 +1001,48 @@
 	  && GET_CODE (XEXP (XEXP (op, 0), 1)) == LABEL_REF)
 	return XEXP (op, 0);
 
+      /* Extending a widening multiplication should be canonicalized to
+	 a wider widening multiplication.  */
+      if (GET_CODE (op) == MULT)
+	{
+	  rtx lhs = XEXP (op, 0);
+	  rtx rhs = XEXP (op, 1);
+	  enum rtx_code lcode = GET_CODE (lhs);
+	  enum rtx_code rcode = GET_CODE (rhs);
+
+	  /* Widening multiplies usually extend both operands, but sometimes
+	     they use a shift to extract a portion of a register.  */
+	  if ((lcode == SIGN_EXTEND
+	       || (lcode == ASHIFTRT && CONST_INT_P (XEXP (lhs, 1))))
+	      && (rcode == SIGN_EXTEND
+		  || (rcode == ASHIFTRT && CONST_INT_P (XEXP (rhs, 1)))))
+	    {
+	      enum machine_mode lmode = GET_MODE (lhs);
+	      enum machine_mode rmode = GET_MODE (rhs);
+	      int bits;
+
+	      if (lcode == ASHIFTRT)
+		/* Number of bits not shifted off the end.  */
+		bits = GET_MODE_PRECISION (lmode) - INTVAL (XEXP (lhs, 1));
+	      else /* lcode == SIGN_EXTEND */
+		/* Size of inner mode.  */
+		bits = GET_MODE_PRECISION (GET_MODE (XEXP (lhs, 0)));
+
+	      if (rcode == ASHIFTRT)
+		bits += GET_MODE_PRECISION (rmode) - INTVAL (XEXP (rhs, 1));
+	      else /* rcode == SIGN_EXTEND */
+		bits += GET_MODE_PRECISION (GET_MODE (XEXP (rhs, 0)));
+
+	      /* We can only widen multiplies if the result is mathematiclly
+		 equivalent.  I.e. if overflow was impossible.  */
+	      if (bits <= GET_MODE_PRECISION (GET_MODE (op)))
+		return simplify_gen_binary
+			 (MULT, mode,
+			  simplify_gen_unary (SIGN_EXTEND, mode, lhs, lmode),
+			  simplify_gen_unary (SIGN_EXTEND, mode, rhs, rmode));
+	    }
+	}
+
       /* Check for a sign extension of a subreg of a promoted
 	 variable, where the promotion is sign-extended, and the
 	 target mode is the same as the variable's promotion.  */
@@ -1072,6 +1114,48 @@
 	  && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (GET_MODE (XEXP (op, 0))))
 	return rtl_hooks.gen_lowpart_no_emit (mode, op);
 
+      /* Extending a widening multiplication should be canonicalized to
+	 a wider widening multiplication.  */
+      if (GET_CODE (op) == MULT)
+	{
+	  rtx lhs = XEXP (op, 0);
+	  rtx rhs = XEXP (op, 1);
+	  enum rtx_code lcode = GET_CODE (lhs);
+	  enum rtx_code rcode = GET_CODE (rhs);
+
+	  /* Widening multiplies usually extend both operands, but sometimes
+	     they use a shift to extract a portion of a register.  */
+	  if ((lcode == ZERO_EXTEND
+	       || (lcode == LSHIFTRT && CONST_INT_P (XEXP (lhs, 1))))
+	      && (rcode == ZERO_EXTEND
+		  || (rcode == LSHIFTRT && CONST_INT_P (XEXP (rhs, 1)))))
+	    {
+	      enum machine_mode lmode = GET_MODE (lhs);
+	      enum machine_mode rmode = GET_MODE (rhs);
+	      int bits;
+
+	      if (lcode == LSHIFTRT)
+		/* Number of bits not shifted off the end.  */
+		bits = GET_MODE_PRECISION (lmode) - INTVAL (XEXP (lhs, 1));
+	      else /* lcode == ZERO_EXTEND */
+		/* Size of inner mode.  */
+		bits = GET_MODE_PRECISION (GET_MODE (XEXP (lhs, 0)));
+
+	      if (rcode == LSHIFTRT)
+		bits += GET_MODE_PRECISION (rmode) - INTVAL (XEXP (rhs, 1));
+	      else /* rcode == ZERO_EXTEND */
+		bits += GET_MODE_PRECISION (GET_MODE (XEXP (rhs, 0)));
+
+	      /* We can only widen multiplies if the result is mathematiclly
+		 equivalent.  I.e. if overflow was impossible.  */
+	      if (bits <= GET_MODE_PRECISION (GET_MODE (op)))
+		return simplify_gen_binary
+			 (MULT, mode,
+			  simplify_gen_unary (ZERO_EXTEND, mode, lhs, lmode),
+			  simplify_gen_unary (ZERO_EXTEND, mode, rhs, rmode));
+	    }
+	}
+
       /* (zero_extend:M (zero_extend:N <X>)) is (zero_extend:M <X>).  */
       if (GET_CODE (op) == ZERO_EXTEND)
 	return simplify_gen_unary (ZERO_EXTEND, mode, XEXP (op, 0),
@@ -2507,6 +2591,46 @@
 							XEXP (op0, 1), mode),
 				    op1);
 
+      /* Given (xor (and A B) C), using P^Q == (~P&Q) | (~Q&P),
+	 we can transform like this:
+            (A&B)^C == ~(A&B)&C | ~C&(A&B)
+                    == (~A|~B)&C | ~C&(A&B)    * DeMorgan's Law
+                    == ~A&C | ~B&C | A&(~C&B)  * Distribute and re-order
+	 Attempt a few simplifications when B and C are both constants.  */
+      if (GET_CODE (op0) == AND
+	  && CONST_INT_P (op1)
+	  && CONST_INT_P (XEXP (op0, 1)))
+	{
+	  rtx a = XEXP (op0, 0);
+	  rtx b = XEXP (op0, 1);
+	  rtx c = op1;
+	  HOST_WIDE_INT bval = INTVAL (b);
+	  HOST_WIDE_INT cval = INTVAL (c);
+
+	  rtx na_c
+	    = simplify_binary_operation (AND, mode,
+					 simplify_gen_unary (NOT, mode, a, mode),
+					 c);
+	  if ((~cval & bval) == 0)
+	    {
+	      /* Try to simplify ~A&C | ~B&C.  */
+	      if (na_c != NULL_RTX)
+		return simplify_gen_binary (IOR, mode, na_c,
+					    GEN_INT (~bval & cval));
+	    }
+	  else
+	    {
+	      /* If ~A&C is zero, simplify A&(~C&B) | ~B&C.  */
+	      if (na_c == const0_rtx)
+		{
+		  rtx a_nc_b = simplify_gen_binary (AND, mode, a,
+						    GEN_INT (~cval & bval));
+		  return simplify_gen_binary (IOR, mode, a_nc_b,
+					      GEN_INT (~bval & cval));
+		}
+	    }
+	}
+
       /* (xor (comparison foo bar) (const_int 1)) can become the reversed
 	 comparison if STORE_FLAG_VALUE is 1.  */
       if (STORE_FLAG_VALUE == 1
@@ -5454,6 +5578,7 @@
   /* Optimize SUBREG truncations of zero and sign extended values.  */
   if ((GET_CODE (op) == ZERO_EXTEND
        || GET_CODE (op) == SIGN_EXTEND)
+      && SCALAR_INT_MODE_P (innermode)
       && GET_MODE_BITSIZE (outermode) < GET_MODE_BITSIZE (innermode))
     {
       unsigned int bitpos = subreg_lsb_1 (outermode, innermode, byte);
@@ -5492,6 +5617,7 @@
   if ((GET_CODE (op) == LSHIFTRT
        || GET_CODE (op) == ASHIFTRT)
       && SCALAR_INT_MODE_P (outermode)
+      && SCALAR_INT_MODE_P (innermode)
       /* Ensure that OUTERMODE is at least twice as wide as the INNERMODE
 	 to avoid the possibility that an outer LSHIFTRT shifts by more
 	 than the sign extension's sign_bit_copies and introduces zeros
@@ -5511,6 +5637,7 @@
   if ((GET_CODE (op) == LSHIFTRT
        || GET_CODE (op) == ASHIFTRT)
       && SCALAR_INT_MODE_P (outermode)
+      && SCALAR_INT_MODE_P (innermode)
       && GET_MODE_BITSIZE (outermode) < GET_MODE_BITSIZE (innermode)
       && CONST_INT_P (XEXP (op, 1))
       && GET_CODE (XEXP (op, 0)) == ZERO_EXTEND
@@ -5525,6 +5652,7 @@
      the outer subreg is effectively a truncation to the original mode.  */
   if (GET_CODE (op) == ASHIFT
       && SCALAR_INT_MODE_P (outermode)
+      && SCALAR_INT_MODE_P (innermode)
       && GET_MODE_BITSIZE (outermode) < GET_MODE_BITSIZE (innermode)
       && CONST_INT_P (XEXP (op, 1))
       && (GET_CODE (XEXP (op, 0)) == ZERO_EXTEND
@@ -5538,7 +5666,7 @@
   /* Recognize a word extraction from a multi-word subreg.  */
   if ((GET_CODE (op) == LSHIFTRT
        || GET_CODE (op) == ASHIFTRT)
-      && SCALAR_INT_MODE_P (outermode)
+      && SCALAR_INT_MODE_P (innermode)
       && GET_MODE_BITSIZE (outermode) >= BITS_PER_WORD
       && GET_MODE_BITSIZE (innermode) >= (2 * GET_MODE_BITSIZE (outermode))
       && CONST_INT_P (XEXP (op, 1))
@@ -5560,6 +5688,7 @@
 
   if ((GET_CODE (op) == LSHIFTRT
        || GET_CODE (op) == ASHIFTRT)
+      && SCALAR_INT_MODE_P (innermode)
       && MEM_P (XEXP (op, 0))
       && CONST_INT_P (XEXP (op, 1))
       && GET_MODE_SIZE (outermode) < GET_MODE_SIZE (GET_MODE (op))
--- a/src/gcc/stmt.c
+++ b/src/gcc/stmt.c
@@ -1683,119 +1683,21 @@
     expand_value_return (result_rtl);
 
   /* If the result is an aggregate that is being returned in one (or more)
-     registers, load the registers here.  The compiler currently can't handle
-     copying a BLKmode value into registers.  We could put this code in a
-     more general area (for use by everyone instead of just function
-     call/return), but until this feature is generally usable it is kept here
-     (and in expand_call).  */
+     registers, load the registers here.  */
 
   else if (retval_rhs != 0
 	   && TYPE_MODE (TREE_TYPE (retval_rhs)) == BLKmode
 	   && REG_P (result_rtl))
     {
-      int i;
-      unsigned HOST_WIDE_INT bitpos, xbitpos;
-      unsigned HOST_WIDE_INT padding_correction = 0;
-      unsigned HOST_WIDE_INT bytes
-	= int_size_in_bytes (TREE_TYPE (retval_rhs));
-      int n_regs = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
-      unsigned int bitsize
-	= MIN (TYPE_ALIGN (TREE_TYPE (retval_rhs)), BITS_PER_WORD);
-      rtx *result_pseudos = XALLOCAVEC (rtx, n_regs);
-      rtx result_reg, src = NULL_RTX, dst = NULL_RTX;
-      rtx result_val = expand_normal (retval_rhs);
-      enum machine_mode tmpmode, result_reg_mode;
-
-      if (bytes == 0)
-	{
-	  expand_null_return ();
-	  return;
-	}
-
-      /* If the structure doesn't take up a whole number of words, see
-	 whether the register value should be padded on the left or on
-	 the right.  Set PADDING_CORRECTION to the number of padding
-	 bits needed on the left side.
-
-	 In most ABIs, the structure will be returned at the least end of
-	 the register, which translates to right padding on little-endian
-	 targets and left padding on big-endian targets.  The opposite
-	 holds if the structure is returned at the most significant
-	 end of the register.  */
-      if (bytes % UNITS_PER_WORD != 0
-	  && (targetm.calls.return_in_msb (TREE_TYPE (retval_rhs))
-	      ? !BYTES_BIG_ENDIAN
-	      : BYTES_BIG_ENDIAN))
-	padding_correction = (BITS_PER_WORD - ((bytes % UNITS_PER_WORD)
-					       * BITS_PER_UNIT));
-
-      /* Copy the structure BITSIZE bits at a time.  */
-      for (bitpos = 0, xbitpos = padding_correction;
-	   bitpos < bytes * BITS_PER_UNIT;
-	   bitpos += bitsize, xbitpos += bitsize)
-	{
-	  /* We need a new destination pseudo each time xbitpos is
-	     on a word boundary and when xbitpos == padding_correction
-	     (the first time through).  */
-	  if (xbitpos % BITS_PER_WORD == 0
-	      || xbitpos == padding_correction)
-	    {
-	      /* Generate an appropriate register.  */
-	      dst = gen_reg_rtx (word_mode);
-	      result_pseudos[xbitpos / BITS_PER_WORD] = dst;
-
-	      /* Clear the destination before we move anything into it.  */
-	      emit_move_insn (dst, CONST0_RTX (GET_MODE (dst)));
-	    }
-
-	  /* We need a new source operand each time bitpos is on a word
-	     boundary.  */
-	  if (bitpos % BITS_PER_WORD == 0)
-	    src = operand_subword_force (result_val,
-					 bitpos / BITS_PER_WORD,
-					 BLKmode);
-
-	  /* Use bitpos for the source extraction (left justified) and
-	     xbitpos for the destination store (right justified).  */
-	  store_bit_field (dst, bitsize, xbitpos % BITS_PER_WORD, word_mode,
-			   extract_bit_field (src, bitsize,
-					      bitpos % BITS_PER_WORD, 1, false,
-					      NULL_RTX, word_mode, word_mode));
-	}
-
-      tmpmode = GET_MODE (result_rtl);
-      if (tmpmode == BLKmode)
+      val = copy_blkmode_to_reg (GET_MODE (result_rtl), retval_rhs);
+      if (val)
 	{
-	  /* Find the smallest integer mode large enough to hold the
-	     entire structure and use that mode instead of BLKmode
-	     on the USE insn for the return register.  */
-	  for (tmpmode = GET_CLASS_NARROWEST_MODE (MODE_INT);
-	       tmpmode != VOIDmode;
-	       tmpmode = GET_MODE_WIDER_MODE (tmpmode))
-	    /* Have we found a large enough mode?  */
-	    if (GET_MODE_SIZE (tmpmode) >= bytes)
-	      break;
-
-	  /* A suitable mode should have been found.  */
-	  gcc_assert (tmpmode != VOIDmode);
-
-	  PUT_MODE (result_rtl, tmpmode);
+	  /* Use the mode of the result value on the return register.  */
+	  PUT_MODE (result_rtl, GET_MODE (val));
+	  expand_value_return (val);
 	}
-
-      if (GET_MODE_SIZE (tmpmode) < GET_MODE_SIZE (word_mode))
-	result_reg_mode = word_mode;
       else
-	result_reg_mode = tmpmode;
-      result_reg = gen_reg_rtx (result_reg_mode);
-
-      for (i = 0; i < n_regs; i++)
-	emit_move_insn (operand_subword (result_reg, i, 0, result_reg_mode),
-			result_pseudos[i]);
-
-      if (tmpmode != result_reg_mode)
-	result_reg = gen_lowpart (tmpmode, result_reg);
-
-      expand_value_return (result_reg);
+	expand_null_return ();
     }
   else if (retval_rhs != 0
 	   && !VOID_TYPE_P (TREE_TYPE (retval_rhs))
--- a/src/gcc/stor-layout.c
+++ b/src/gcc/stor-layout.c
@@ -546,6 +546,34 @@
   return MIN (BIGGEST_ALIGNMENT, MAX (1, mode_base_align[mode]*BITS_PER_UNIT));
 }
 
+/* Return the natural mode of an array, given that it is SIZE bytes in
+   total and has elements of type ELEM_TYPE.  */
+
+static enum machine_mode
+mode_for_array (tree elem_type, tree size)
+{
+  tree elem_size;
+  unsigned HOST_WIDE_INT int_size, int_elem_size;
+  bool limit_p;
+
+  /* One-element arrays get the component type's mode.  */
+  elem_size = TYPE_SIZE (elem_type);
+  if (simple_cst_equal (size, elem_size))
+    return TYPE_MODE (elem_type);
+
+  limit_p = true;
+  if (host_integerp (size, 1) && host_integerp (elem_size, 1))
+    {
+      int_size = tree_low_cst (size, 1);
+      int_elem_size = tree_low_cst (elem_size, 1);
+      if (int_elem_size > 0
+	  && int_size % int_elem_size == 0
+	  && targetm.array_mode_supported_p (TYPE_MODE (elem_type),
+					     int_size / int_elem_size))
+	limit_p = false;
+    }
+  return mode_for_size_tree (size, MODE_INT, limit_p);
+}
 
 /* Subroutine of layout_decl: Force alignment required for the data type.
    But if the decl itself wants greater alignment, don't override that.  */
@@ -2048,14 +2076,8 @@
 	    && (TYPE_MODE (TREE_TYPE (type)) != BLKmode
 		|| TYPE_NO_FORCE_BLK (TREE_TYPE (type))))
 	  {
-	    /* One-element arrays get the component type's mode.  */
-	    if (simple_cst_equal (TYPE_SIZE (type),
-				  TYPE_SIZE (TREE_TYPE (type))))
-	      SET_TYPE_MODE (type, TYPE_MODE (TREE_TYPE (type)));
-	    else
-	      SET_TYPE_MODE (type, mode_for_size_tree (TYPE_SIZE (type),
-						       MODE_INT, 1));
-
+	    SET_TYPE_MODE (type, mode_for_array (TREE_TYPE (type),
+						 TYPE_SIZE (type)));
 	    if (TYPE_MODE (type) != BLKmode
 		&& STRICT_ALIGNMENT && TYPE_ALIGN (type) < BIGGEST_ALIGNMENT
 		&& TYPE_ALIGN (type) < GET_MODE_ALIGNMENT (TYPE_MODE (type)))
--- a/src/gcc/target.def
+++ b/src/gcc/target.def
@@ -1344,6 +1344,13 @@
  unsigned, (unsigned nunroll, struct loop *loop),
  NULL)
 
+/* True if X is a legitimate MODE-mode immediate operand.  */
+DEFHOOK
+(legitimate_constant_p,
+ "",
+ bool, (enum machine_mode mode, rtx x),
+ default_legitimate_constant_p)
+
 /* True if the constant X cannot be placed in the constant pool.  */
 DEFHOOK
 (cannot_force_const_mem,
@@ -1621,6 +1628,38 @@
  HOST_WIDE_INT, (const_tree type),
  default_vector_alignment)
 
+/* True if we should try to use a scalar mode to represent an array,
+   overriding the usual MAX_FIXED_MODE limit.  */
+DEFHOOK
+(array_mode_supported_p,
+ "Return true if GCC should try to use a scalar mode to store an array\n\
+of @var{nelems} elements, given that each element has mode @var{mode}.\n\
+Returning true here overrides the usual @code{MAX_FIXED_MODE} limit\n\
+and allows GCC to use any defined integer mode.\n\
+\n\
+One use of this hook is to support vector load and store operations\n\
+that operate on several homogeneous vectors.  For example, ARM NEON\n\
+has operations like:\n\
+\n\
+@smallexample\n\
+int8x8x3_t vld3_s8 (const int8_t *)\n\
+@end smallexample\n\
+\n\
+where the return type is defined as:\n\
+\n\
+@smallexample\n\
+typedef struct int8x8x3_t\n\
+@{\n\
+  int8x8_t val[3];\n\
+@} int8x8x3_t;\n\
+@end smallexample\n\
+\n\
+If this hook allows @code{val} to have a scalar mode, then\n\
+@code{int8x8x3_t} can have the same mode.  GCC can then store\n\
+@code{int8x8x3_t}s in registers rather than forcing them onto the stack.",
+ bool, (enum machine_mode mode, unsigned HOST_WIDE_INT nelems),
+ hook_bool_mode_uhwi_false)
+
 /* Compute cost of moving data from a register of class FROM to one of
    TO, using MODE.  */
 DEFHOOK
--- a/src/gcc/targhooks.c
+++ b/src/gcc/targhooks.c
@@ -1527,4 +1527,15 @@
     { OPT_LEVELS_NONE, 0, NULL, 0 }
   };
 
+bool
+default_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
+			       rtx x ATTRIBUTE_UNUSED)
+{
+#ifdef LEGITIMATE_CONSTANT_P
+  return LEGITIMATE_CONSTANT_P (x);
+#else
+  return true;
+#endif
+}
+
 #include "gt-targhooks.h"
--- a/src/gcc/targhooks.h
+++ b/src/gcc/targhooks.h
@@ -185,3 +185,4 @@
 
 extern void *default_get_pch_validity (size_t *);
 extern const char *default_pch_valid_p (const void *, size_t);
+extern bool default_legitimate_constant_p (enum machine_mode, rtx);
--- a/src/gcc/testsuite/gcc.c-torture/compile/20110401-1.c
+++ b/src/gcc/testsuite/gcc.c-torture/compile/20110401-1.c
@@ -0,0 +1,22 @@
+void asn1_length_der (unsigned long int len, unsigned char *ans, int *ans_len)
+{
+    int k;
+    unsigned char temp[4];
+    if (len < 128) {
+	if (ans != ((void *) 0))
+	    ans[0] = (unsigned char) len;
+	*ans_len = 1;
+    } else {
+	k = 0;
+	while (len) {
+	    temp[k++] = len & 0xFF;
+	    len = len >> 8;
+	}
+	*ans_len = k + 1;
+	if (ans != ((void *) 0)) {
+	    ans[0] = ((unsigned char) k & 0x7F) + 128;
+	    while (k--)
+		ans[*ans_len - 1 - k] = temp[k];
+	}
+    }
+}
--- a/src/gcc/testsuite/gcc.dg/di-longlong64-sync-1.c
+++ b/src/gcc/testsuite/gcc.dg/di-longlong64-sync-1.c
@@ -0,0 +1,164 @@
+/* { dg-do run } */
+/* { dg-require-effective-target sync_longlong } */
+/* { dg-options "-std=gnu99" } */
+/* { dg-message "note: '__sync_fetch_and_nand' changed semantics in GCC 4.4" "" { target *-*-* } 0 } */
+/* { dg-message "note: '__sync_nand_and_fetch' changed semantics in GCC 4.4" "" { target *-*-* } 0 } */
+
+
+/* Test basic functionality of the intrinsics.  The operations should
+   not be optimized away if no one checks the return values.  */
+
+/* Based on ia64-sync-[12].c, but 1) long on ARM is 32 bit so use long long
+   (an explicit 64bit type maybe a better bet) and 2) Use values that cross
+   the 32bit boundary and cause carries since the actual maths are done as
+   pairs of 32 bit instructions.  */
+
+/* Note: This file is #included by some of the ARM tests.  */
+
+__extension__ typedef __SIZE_TYPE__ size_t;
+
+extern void abort (void);
+extern void *memcpy (void *, const void *, size_t);
+extern int memcmp (const void *, const void *, size_t);
+
+/* Temporary space where the work actually gets done.  */
+static long long AL[24];
+/* Values copied into AL before we start.  */
+static long long init_di[24] = { 0x100000002ll, 0x200000003ll, 0, 1,
+
+				 0x100000002ll, 0x100000002ll,
+				 0x100000002ll, 0x100000002ll,
+
+				 0, 0x1000e0de0000ll,
+				 42 , 0xc001c0de0000ll,
+
+				 -1ll, 0, 0xff00ff0000ll, -1ll,
+
+				 0, 0x1000e0de0000ll,
+				 42 , 0xc001c0de0000ll,
+
+				 -1ll, 0, 0xff00ff0000ll, -1ll};
+/* This is what should be in AL at the end.  */
+static long long test_di[24] = { 0x1234567890ll, 0x1234567890ll, 1, 0,
+
+				 0x100000002ll, 0x100000002ll,
+				 0x100000002ll, 0x100000002ll,
+
+				 1, 0xc001c0de0000ll,
+				 20, 0x1000e0de0000ll,
+
+				 0x300000007ll , 0x500000009ll,
+				 0xf100ff0001ll, ~0xa00000007ll,
+
+				 1, 0xc001c0de0000ll,
+				 20, 0x1000e0de0000ll,
+
+				 0x300000007ll , 0x500000009ll,
+				 0xf100ff0001ll, ~0xa00000007ll };
+
+/* First check they work in terms of what they do to memory.  */
+static void
+do_noret_di (void)
+{
+  __sync_val_compare_and_swap (AL+0, 0x100000002ll, 0x1234567890ll);
+  __sync_bool_compare_and_swap (AL+1, 0x200000003ll, 0x1234567890ll);
+  __sync_lock_test_and_set (AL+2, 1);
+  __sync_lock_release (AL+3);
+
+  /* The following tests should not change the value since the
+     original does NOT match.  */
+  __sync_val_compare_and_swap (AL+4, 0x000000002ll, 0x1234567890ll);
+  __sync_val_compare_and_swap (AL+5, 0x100000000ll, 0x1234567890ll);
+  __sync_bool_compare_and_swap (AL+6, 0x000000002ll, 0x1234567890ll);
+  __sync_bool_compare_and_swap (AL+7, 0x100000000ll, 0x1234567890ll);
+
+  __sync_fetch_and_add (AL+8, 1);
+  __sync_fetch_and_add (AL+9, 0xb000e0000000ll); /* + to both halves & carry.  */
+  __sync_fetch_and_sub (AL+10, 22);
+  __sync_fetch_and_sub (AL+11, 0xb000e0000000ll);
+
+  __sync_fetch_and_and (AL+12, 0x300000007ll);
+  __sync_fetch_and_or (AL+13, 0x500000009ll);
+  __sync_fetch_and_xor (AL+14, 0xe00000001ll);
+  __sync_fetch_and_nand (AL+15, 0xa00000007ll);
+
+  /* These should be the same as the fetch_and_* cases except for
+     return value.  */
+  __sync_add_and_fetch (AL+16, 1);
+  /* add to both halves & carry.  */
+  __sync_add_and_fetch (AL+17, 0xb000e0000000ll);
+  __sync_sub_and_fetch (AL+18, 22);
+  __sync_sub_and_fetch (AL+19, 0xb000e0000000ll);
+
+  __sync_and_and_fetch (AL+20, 0x300000007ll);
+  __sync_or_and_fetch (AL+21, 0x500000009ll);
+  __sync_xor_and_fetch (AL+22, 0xe00000001ll);
+  __sync_nand_and_fetch (AL+23, 0xa00000007ll);
+}
+
+/* Now check return values.  */
+static void
+do_ret_di (void)
+{
+  if (__sync_val_compare_and_swap (AL+0, 0x100000002ll, 0x1234567890ll) !=
+	0x100000002ll) abort ();
+  if (__sync_bool_compare_and_swap (AL+1, 0x200000003ll, 0x1234567890ll) !=
+	1) abort ();
+  if (__sync_lock_test_and_set (AL+2, 1) != 0) abort ();
+  __sync_lock_release (AL+3); /* no return value, but keep to match results.  */
+
+  /* The following tests should not change the value since the
+     original does NOT match.  */
+  if (__sync_val_compare_and_swap (AL+4, 0x000000002ll, 0x1234567890ll) !=
+	0x100000002ll) abort ();
+  if (__sync_val_compare_and_swap (AL+5, 0x100000000ll, 0x1234567890ll) !=
+	0x100000002ll) abort ();
+  if (__sync_bool_compare_and_swap (AL+6, 0x000000002ll, 0x1234567890ll) !=
+	0) abort ();
+  if (__sync_bool_compare_and_swap (AL+7, 0x100000000ll, 0x1234567890ll) !=
+	0) abort ();
+
+  if (__sync_fetch_and_add (AL+8, 1) != 0) abort ();
+  if (__sync_fetch_and_add (AL+9, 0xb000e0000000ll) != 0x1000e0de0000ll) abort ();
+  if (__sync_fetch_and_sub (AL+10, 22) != 42) abort ();
+  if (__sync_fetch_and_sub (AL+11, 0xb000e0000000ll) != 0xc001c0de0000ll)
+	abort ();
+
+  if (__sync_fetch_and_and (AL+12, 0x300000007ll) != -1ll) abort ();
+  if (__sync_fetch_and_or (AL+13, 0x500000009ll) != 0) abort ();
+  if (__sync_fetch_and_xor (AL+14, 0xe00000001ll) != 0xff00ff0000ll) abort ();
+  if (__sync_fetch_and_nand (AL+15, 0xa00000007ll) != -1ll) abort ();
+
+  /* These should be the same as the fetch_and_* cases except for
+     return value.  */
+  if (__sync_add_and_fetch (AL+16, 1) != 1) abort ();
+  if (__sync_add_and_fetch (AL+17, 0xb000e0000000ll) != 0xc001c0de0000ll)
+	abort ();
+  if (__sync_sub_and_fetch (AL+18, 22) != 20) abort ();
+  if (__sync_sub_and_fetch (AL+19, 0xb000e0000000ll) != 0x1000e0de0000ll)
+	abort ();
+
+  if (__sync_and_and_fetch (AL+20, 0x300000007ll) != 0x300000007ll) abort ();
+  if (__sync_or_and_fetch (AL+21, 0x500000009ll) != 0x500000009ll) abort ();
+  if (__sync_xor_and_fetch (AL+22, 0xe00000001ll) != 0xf100ff0001ll) abort ();
+  if (__sync_nand_and_fetch (AL+23, 0xa00000007ll) != ~0xa00000007ll) abort ();
+}
+
+int main ()
+{
+  memcpy (AL, init_di, sizeof (init_di));
+
+  do_noret_di ();
+
+  if (memcmp (AL, test_di, sizeof (test_di)))
+    abort ();
+
+  memcpy (AL, init_di, sizeof (init_di));
+
+  do_ret_di ();
+
+  if (memcmp (AL, test_di, sizeof (test_di)))
+    abort ();
+
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.dg/di-sync-multithread.c
+++ b/src/gcc/testsuite/gcc.dg/di-sync-multithread.c
@@ -0,0 +1,205 @@
+/* { dg-do run } */
+/* { dg-require-effective-target sync_longlong } */
+/* { dg-require-effective-target pthread_h } */
+/* { dg-require-effective-target pthread } */
+/* { dg-options "-pthread -std=gnu99" } */
+
+/* test of long long atomic ops performed in parallel in 3 pthreads
+   david.gilbert@linaro.org */
+
+#include <pthread.h>
+#include <unistd.h>
+
+/*#define DEBUGIT 1 */
+
+#ifdef DEBUGIT
+#include <stdio.h>
+
+#define DOABORT(x,...) {\
+	 fprintf (stderr, x, __VA_ARGS__); fflush (stderr); abort ();\
+	 }
+
+#else
+
+#define DOABORT(x,...) abort ();
+
+#endif
+
+/* Passed to each thread to describe which bits it is going to work on.  */
+struct threadwork {
+  unsigned long long count; /* incremented each time the worker loops.  */
+  unsigned int thread;    /* ID */
+  unsigned int addlsb;    /* 8 bit */
+  unsigned int logic1lsb; /* 5 bit */
+  unsigned int logic2lsb; /* 8 bit */
+};
+
+/* The shared word where all the atomic work is done.  */
+static volatile long long workspace;
+
+/* A shared word to tell the workers to quit when non-0.  */
+static long long doquit;
+
+extern void abort (void);
+
+/* Note this test doesn't test the return values much.  */
+void*
+worker (void* data)
+{
+  struct threadwork *tw = (struct threadwork*)data;
+  long long add1bit = 1ll << tw->addlsb;
+  long long logic1bit = 1ll << tw->logic1lsb;
+  long long logic2bit = 1ll << tw->logic2lsb;
+
+  /* Clear the bits we use.  */
+  __sync_and_and_fetch (&workspace, ~(0xffll * add1bit));
+  __sync_fetch_and_and (&workspace, ~(0x1fll * logic1bit));
+  __sync_fetch_and_and (&workspace, ~(0xffll * logic2bit));
+
+  do
+    {
+      long long tmp1, tmp2, tmp3;
+      /* OK, lets try and do some stuff to the workspace - by the end
+         of the main loop our area should be the same as it is now - i.e. 0.  */
+
+      /* Push the arithmetic section upto 128 - one of the threads will
+         case this to carry accross the 32bit boundary.  */
+      for (tmp2 = 0; tmp2 < 64; tmp2++)
+	{
+	  /* Add 2 using the two different adds.  */
+	  tmp1 = __sync_add_and_fetch (&workspace, add1bit);
+	  tmp3 = __sync_fetch_and_add (&workspace, add1bit);
+
+	  /* The value should be the intermediate add value in both cases.  */
+	  if ((tmp1 & (add1bit * 0xff)) != (tmp3 & (add1bit * 0xff)))
+	    DOABORT ("Mismatch of add intermediates on thread %d "
+			"workspace=0x%llx tmp1=0x%llx "
+			"tmp2=0x%llx tmp3=0x%llx\n",
+			 tw->thread, workspace, tmp1, tmp2, tmp3);
+	}
+
+      /* Set the logic bits.  */
+      tmp2=__sync_or_and_fetch (&workspace,
+			  0x1fll * logic1bit | 0xffll * logic2bit);
+
+      /* Check the logic bits are set and the arithmetic value is correct.  */
+      if ((tmp2 & (0x1fll * logic1bit | 0xffll * logic2bit
+			| 0xffll * add1bit))
+	  != (0x1fll * logic1bit | 0xffll * logic2bit | 0x80ll * add1bit))
+	DOABORT ("Midloop check failed on thread %d "
+			"workspace=0x%llx tmp2=0x%llx "
+			"masktmp2=0x%llx expected=0x%llx\n",
+		tw->thread, workspace, tmp2,
+		tmp2 & (0x1fll * logic1bit | 0xffll * logic2bit |
+			 0xffll * add1bit),
+		(0x1fll * logic1bit | 0xffll * logic2bit | 0x80ll * add1bit));
+
+      /* Pull the arithmetic set back down to 0 - again this should cause a
+	 carry across the 32bit boundary in one thread.  */
+
+      for (tmp2 = 0; tmp2 < 64; tmp2++)
+	{
+	  /* Subtract 2 using the two different subs.  */
+	  tmp1=__sync_sub_and_fetch (&workspace, add1bit);
+	  tmp3=__sync_fetch_and_sub (&workspace, add1bit);
+
+	  /* The value should be the intermediate sub value in both cases.  */
+	  if ((tmp1 & (add1bit * 0xff)) != (tmp3 & (add1bit * 0xff)))
+	    DOABORT ("Mismatch of sub intermediates on thread %d "
+			"workspace=0x%llx tmp1=0x%llx "
+			"tmp2=0x%llx tmp3=0x%llx\n",
+			tw->thread, workspace, tmp1, tmp2, tmp3);
+	}
+
+
+      /* Clear the logic bits.  */
+      __sync_fetch_and_xor (&workspace, 0x1fll * logic1bit);
+      tmp3=__sync_and_and_fetch (&workspace, ~(0xffll * logic2bit));
+
+      /* The logic bits and the arithmetic bits should be zero again.  */
+      if (tmp3 & (0x1fll * logic1bit | 0xffll * logic2bit | 0xffll * add1bit))
+	DOABORT ("End of worker loop; bits none 0 on thread %d "
+			"workspace=0x%llx tmp3=0x%llx "
+			"mask=0x%llx maskedtmp3=0x%llx\n",
+		tw->thread, workspace, tmp3, (0x1fll * logic1bit |
+			0xffll * logic2bit | 0xffll * add1bit),
+		tmp3 & (0x1fll * logic1bit | 0xffll * logic2bit | 0xffll * add1bit));
+
+      __sync_add_and_fetch (&tw->count, 1);
+    }
+  while (!__sync_bool_compare_and_swap (&doquit, 1, 1));
+
+  pthread_exit (0);
+}
+
+int
+main ()
+{
+  /* We have 3 threads doing three sets of operations, an 8 bit
+     arithmetic field, a 5 bit logic field and an 8 bit logic
+     field (just to pack them all in).
+
+  6      5       4       4       3       2       1
+  3      6       8       0       2       4       6       8       0
+  |...,...|...,...|...,...|...,...|...,...|...,...|...,...|...,...
+  - T0   --  T1  -- T2   --T2 --  T0  -*- T2-- T1-- T1   -***- T0-
+   logic2  logic2  arith   log2  arith  log1 log1  arith     log1
+
+  */
+  unsigned int t;
+  long long tmp;
+  int err;
+
+  struct threadwork tw[3]={
+    { 0ll, 0, 27, 0, 56 },
+    { 0ll, 1,  8,16, 48 },
+    { 0ll, 2, 40,21, 35 }
+  };
+
+  pthread_t threads[3];
+
+  __sync_lock_release (&doquit);
+
+  /* Get the work space into a known value - All 1's.  */
+  __sync_lock_release (&workspace); /* Now all 0.  */
+  tmp = __sync_val_compare_and_swap (&workspace, 0, -1ll);
+  if (tmp!=0)
+    DOABORT ("Initial __sync_val_compare_and_swap wasn't 0 workspace=0x%llx "
+		"tmp=0x%llx\n", workspace,tmp);
+
+  for (t = 0; t < 3; t++)
+  {
+    err=pthread_create (&threads[t], NULL , worker, &tw[t]);
+    if (err) DOABORT ("pthread_create failed on thread %d with error %d\n",
+	t, err);
+  };
+
+  sleep (5);
+
+  /* Stop please.  */
+  __sync_lock_test_and_set (&doquit, 1ll);
+
+  for (t = 0; t < 3; t++)
+    {
+      err=pthread_join (threads[t], NULL);
+      if (err)
+	DOABORT ("pthread_join failed on thread %d with error %d\n", t, err);
+    };
+
+  __sync_synchronize ();
+
+  /* OK, so all the workers have finished -
+     the workers should have zero'd their workspace, the unused areas
+     should still be 1.  */
+  if (!__sync_bool_compare_and_swap (&workspace, 0x040000e0ll, 0))
+    DOABORT ("End of run workspace mismatch, got %llx\n", workspace);
+
+  /* All the workers should have done some work.  */
+  for (t = 0; t < 3; t++)
+    {
+      if (tw[t].count == 0) DOABORT ("Worker %d gave 0 count\n", t);
+    };
+
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.dg/pr50193-1.c
+++ b/src/gcc/testsuite/gcc.dg/pr50193-1.c
@@ -0,0 +1,10 @@
+/* PR 50193: ARM: ICE on a | (b << negative-constant) */
+/* Ensure that the compiler doesn't ICE.  */
+
+/* { dg-options "-O2" } */
+
+int
+foo(int a, int b)
+{
+  return a | (b << -3); /* { dg-warning "left shift count is negative" } */
+}
--- a/src/gcc/testsuite/gcc.dg/pr50717-1.c
+++ b/src/gcc/testsuite/gcc.dg/pr50717-1.c
@@ -0,0 +1,26 @@
+/* PR tree-optimization/50717  */
+/* Ensure that widening multiply-and-accumulate is not used where integer
+   type promotion or users' casts should prevent it.  */
+
+/* { dg-options "-O2 -fdump-tree-widening_mul" } */
+
+long long
+f (unsigned int a, char b, long long c)
+{
+  return (a * b) + c;
+}
+
+int
+g (short a, short b, int c)
+{
+  return (short)(a * b) + c;
+}
+
+int
+h (char a, char b, int c)
+{
+  return (char)(a * b) + c;
+}
+
+/* { dg-final { scan-tree-dump-times "WIDEN_MULT_PLUS_EXPR" 0 "widening_mul" } } */
+/* { dg-final { cleanup-tree-dump "widening_mul" } } */
--- a/src/gcc/testsuite/gcc.dg/sms-10.c
+++ b/src/gcc/testsuite/gcc.dg/sms-10.c
@@ -0,0 +1,118 @@
+ /* { dg-do run } */
+ /* { dg-options "-O2 -fmodulo-sched -fmodulo-sched-allow-regmoves -fdump-rtl-sms" } */
+
+
+typedef __SIZE_TYPE__ size_t;
+extern void *malloc (size_t);
+extern void free (void *);
+extern void abort (void);
+
+struct regstat_n_sets_and_refs_t
+{
+  int sets;
+  int refs;
+};
+
+struct regstat_n_sets_and_refs_t *regstat_n_sets_and_refs;
+
+struct df_reg_info
+{
+  unsigned int n_refs;
+};
+
+struct df_d
+{
+  struct df_reg_info **def_regs;
+  struct df_reg_info **use_regs;
+};
+struct df_d *df;
+
+static inline int
+REG_N_SETS (int regno)
+{
+  return regstat_n_sets_and_refs[regno].sets;
+}
+
+__attribute__ ((noinline))
+     int max_reg_num (void)
+{
+  return 100;
+}
+
+__attribute__ ((noinline))
+     void regstat_init_n_sets_and_refs (void)
+{
+  unsigned int i;
+  unsigned int max_regno = max_reg_num ();
+
+  for (i = 0; i < max_regno; i++)
+    {
+      (regstat_n_sets_and_refs[i].sets = (df->def_regs[(i)]->n_refs));
+      (regstat_n_sets_and_refs[i].refs =
+       (df->use_regs[(i)]->n_refs) + REG_N_SETS (i));
+    }
+}
+
+int a_sets[100] =
+  { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+  21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
+  40, 41, 42,
+  43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
+  62, 63, 64,
+  65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83,
+  84, 85, 86,
+  87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99
+};
+
+int a_refs[100] =
+  { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38,
+  40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76,
+  78, 80, 82,
+  84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116,
+  118, 120,
+  122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 142, 144, 146, 148, 150,
+  152, 154, 156,
+  158, 160, 162, 164, 166, 168, 170, 172, 174, 176, 178, 180, 182, 184, 186,
+  188, 190, 192,
+  194, 196, 198
+};
+
+int
+main ()
+{
+  struct df_reg_info *b[100], *c[100];
+  struct df_d df1;
+  size_t s = sizeof (struct df_reg_info);
+  struct regstat_n_sets_and_refs_t a[100];
+
+  df = &df1;
+  regstat_n_sets_and_refs = a;
+  int i;
+
+  for (i = 0; i < 100; i++)
+    {
+      b[i] = (struct df_reg_info *) malloc (s);
+      b[i]->n_refs = i;
+      c[i] = (struct df_reg_info *) malloc (s);
+      c[i]->n_refs = i;
+    }
+
+  df1.def_regs = b;
+  df1.use_regs = c;
+  regstat_init_n_sets_and_refs ();
+
+  for (i = 0; i < 100; i++)
+    if ((a[i].sets != a_sets[i]) || (a[i].refs != a_refs[i]))
+      abort ();
+
+  for (i = 0; i < 100; i++)
+    {
+      free (b[i]);
+      free (c[i]);
+    }
+
+  return 0;
+}
+
+/* { dg-final { scan-rtl-dump-times "SMS succeeded" 1 "sms" { target powerpc*-*-* } } } */
+/* { dg-final { cleanup-rtl-dump "sms" } } */
--- a/src/gcc/testsuite/gcc.dg/sms-11.c
+++ b/src/gcc/testsuite/gcc.dg/sms-11.c
@@ -0,0 +1,37 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fmodulo-sched -fmodulo-sched-allow-regmoves -fdump-rtl-sms" } */
+
+extern void abort (void);
+
+float out[4][4] = { 6, 6, 7, 5, 6, 7, 5, 5, 6, 4, 4, 4, 6, 2, 3, 4 };
+
+void
+invert (void)
+{
+  int i, j, k = 0, swap;
+  float tmp[4][4] = { 5, 6, 7, 5, 6, 7, 5, 5, 4, 4, 4, 4, 3, 2, 3, 4 };
+
+  for (i = 0; i < 4; i++)
+    {
+      for (j = i + 1; j < 4; j++)
+	if (tmp[j][i] > tmp[i][i])
+	  swap = j;
+
+      if (swap != i)
+	tmp[i][k] = tmp[swap][k];
+    }
+
+  for (i = 0; i < 4; i++)
+    for (j = 0; j < 4; j++)
+      if (tmp[i][j] != out[i][j])
+	abort ();
+}
+
+int
+main ()
+{
+  invert ();
+  return 0;
+}
+
+/* { dg-final { cleanup-rtl-dump "sms" } } */
--- a/src/gcc/testsuite/gcc.dg/sms-9.c
+++ b/src/gcc/testsuite/gcc.dg/sms-9.c
@@ -0,0 +1,60 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fmodulo-sched -fno-auto-inc-dec -O2 -fmodulo-sched-allow-regmoves" } */
+
+#include <stdlib.h>
+#include <stdarg.h>
+
+struct df_ref_info
+{
+  unsigned int *begin;
+  unsigned int *count;
+};
+
+extern void *memset (void *s, int c, __SIZE_TYPE__ n);
+
+
+__attribute__ ((noinline))
+     int
+     df_reorganize_refs_by_reg_by_insn (struct df_ref_info *ref_info,
+					int num, unsigned int start)
+{
+  unsigned int m = num;
+  unsigned int offset = 77;
+  unsigned int r;
+
+  for (r = start; r < m; r++)
+    {
+      ref_info->begin[r] = offset;
+      offset += ref_info->count[r];
+      ref_info->count[r] = 0;
+    }
+
+  return offset;
+}
+
+int
+main ()
+{
+  struct df_ref_info temp;
+  int num = 100;
+  unsigned int start = 5;
+  int i, offset;
+
+  temp.begin = malloc (100 * sizeof (unsigned int));
+  temp.count = malloc (100 * sizeof (unsigned int));
+
+  memset (temp.begin, 0, sizeof (unsigned int) * num);
+  memset (temp.count, 0, sizeof (unsigned int) * num);
+
+  for (i = 0; i < num; i++)
+    temp.count[i] = i + 1;
+
+  offset = df_reorganize_refs_by_reg_by_insn (&temp, num, start);
+
+  if (offset != 5112)
+    abort ();
+
+  free (temp.begin);
+  free (temp.count);
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.dg/torture/pr49030.c
+++ b/src/gcc/testsuite/gcc.dg/torture/pr49030.c
@@ -0,0 +1,19 @@
+void
+sample_move_d32u24_sS (char *dst, float *src, unsigned long nsamples,
+		       unsigned long dst_skip)
+{
+  long long y;
+  while (nsamples--)
+    {
+      y = (long long) (*src * 8388608.0f) << 8;
+      if (y > 2147483647) {
+	*(int *) dst = 2147483647;
+      } else if (y < -2147483647 - 1) {
+	*(int *) dst = -2147483647 - 1;
+      } else {
+	*(int *) dst = (int) y;
+      }
+      dst += dst_skip;
+      src++;
+    }
+}
--- a/src/gcc/testsuite/gcc.dg/torture/pr49169.c
+++ b/src/gcc/testsuite/gcc.dg/torture/pr49169.c
@@ -0,0 +1,15 @@
+/* { dg-do compile { target { arm*-*-* || mips*-*-* } } } */
+
+#include <stdlib.h>
+#include <stdint.h>
+
+int
+main (void)
+{
+  void *p = main;
+  if ((intptr_t) p & 1)
+    abort ();
+  return 0;
+}
+
+/* { dg-final { scan-assembler "abort" } } */
--- a/src/gcc/testsuite/gcc.dg/vect/bb-slp-11.c
+++ b/src/gcc/testsuite/gcc.dg/vect/bb-slp-11.c
@@ -48,7 +48,6 @@
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 0 "slp" } } */
-/* { dg-final { scan-tree-dump-times "SLP with multiple types" 1 "slp" } } */
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect64 } } } */
 /* { dg-final { cleanup-tree-dump "slp" } } */
   
--- a/src/gcc/testsuite/gcc.dg/vect/bb-slp-24.c
+++ b/src/gcc/testsuite/gcc.dg/vect/bb-slp-24.c
@@ -0,0 +1,59 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define A 3
+#define N 256
+
+short src[N], dst[N];
+
+void foo (short * __restrict__ dst, short * __restrict__ src, int h,
+          int stride, int dummy)
+{
+  int i;
+  h /= 8;
+  for (i = 0; i < h; i++)
+    {
+      dst[0] += A*src[0];
+      dst[1] += A*src[1];
+      dst[2] += A*src[2];
+      dst[3] += A*src[3];
+      dst[4] += A*src[4];
+      dst[5] += A*src[5];
+      dst[6] += A*src[6];
+      dst[7] += A*src[7];
+      dst += stride;
+      src += stride;
+      if (dummy == 32)
+        abort ();
+    }
+}
+
+
+int main (void)
+{
+  int i;
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+    {
+      dst[i] = 0;
+      src[i] = i;
+    }
+
+  foo (dst, src, N, 8, 0);
+
+  for (i = 0; i < N; i++)
+    {
+      if (dst[i] != A * i)
+        abort ();
+    }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect_element_align } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
--- a/src/gcc/testsuite/gcc.dg/vect/bb-slp-25.c
+++ b/src/gcc/testsuite/gcc.dg/vect/bb-slp-25.c
@@ -0,0 +1,59 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define A 3
+#define B 4
+#define N 256
+
+short src[N], dst[N];
+
+void foo (short * __restrict__ dst, short * __restrict__ src, int h, int stride, int dummy)
+{
+  int i;
+  h /= 16;
+  for (i = 0; i < h; i++)
+    {
+      dst[0] += A*src[0] + src[stride];
+      dst[1] += A*src[1] + src[1+stride];
+      dst[2] += A*src[2] + src[2+stride];
+      dst[3] += A*src[3] + src[3+stride];
+      dst[4] += A*src[4] + src[4+stride];
+      dst[5] += A*src[5] + src[5+stride];
+      dst[6] += A*src[6] + src[6+stride];
+      dst[7] += A*src[7] + src[7+stride];
+      dst += 8;
+      src += 8;
+      if (dummy == 32)
+        abort ();
+   }
+}
+
+
+int main (void)
+{
+  int i;
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+    {
+       dst[i] = 0;
+       src[i] = i;
+    }
+
+  foo (dst, src, N, 8, 0);
+
+  for (i = 0; i < N/2; i++)
+    {
+      if (dst[i] != A * i + i + 8)
+        abort ();
+    }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect_element_align } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
--- a/src/gcc/testsuite/gcc.dg/vect/bb-slp-26.c
+++ b/src/gcc/testsuite/gcc.dg/vect/bb-slp-26.c
@@ -0,0 +1,60 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define A 3
+#define B 4
+#define N 256
+
+char src[N], dst[N];
+
+void foo (char * __restrict__ dst, char * __restrict__ src, int h,
+          int stride, int dummy)
+{
+  int i;
+  h /= 16;
+  for (i = 0; i < h; i++)
+    {
+      dst[0] += A*src[0];
+      dst[1] += A*src[1];
+      dst[2] += A*src[2];
+      dst[3] += A*src[3];
+      dst[4] += A*src[4];
+      dst[5] += A*src[5];
+      dst[6] += A*src[6];
+      dst[7] += A*src[7];
+      dst += 8;
+      src += 8;
+      if (dummy == 32)
+        abort ();
+   }
+}
+
+
+int main (void)
+{
+  int i;
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+    {
+       dst[i] = 0;
+       src[i] = i/8;
+    }
+
+  foo (dst, src, N, 8, 0);
+
+  for (i = 0; i < N/2; i++)
+    {
+      if (dst[i] != A * src[i])
+        abort ();
+    }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect64 } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
--- a/src/gcc/testsuite/gcc.dg/vect/bb-slp-27.c
+++ b/src/gcc/testsuite/gcc.dg/vect/bb-slp-27.c
@@ -0,0 +1,49 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define A 3
+#define N 16
+
+short src[N], dst[N];
+
+void foo (int a)
+{
+  dst[0] += a*src[0];
+  dst[1] += a*src[1];
+  dst[2] += a*src[2];
+  dst[3] += a*src[3];
+  dst[4] += a*src[4];
+  dst[5] += a*src[5];
+  dst[6] += a*src[6];
+  dst[7] += a*src[7];
+}
+
+
+int main (void)
+{
+  int i;
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+    {
+      dst[i] = 0;
+      src[i] = i;
+    }
+
+  foo (A);
+
+  for (i = 0; i < 8; i++)
+    {
+      if (dst[i] != A * i)
+        abort ();
+    }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target { vect_int_mult && { vect_unpack && vect_pack_trunc } } } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
--- a/src/gcc/testsuite/gcc.dg/vect/bb-slp-28.c
+++ b/src/gcc/testsuite/gcc.dg/vect/bb-slp-28.c
@@ -0,0 +1,71 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define A 300
+#define N 16
+
+char src[N];
+short dst[N];
+short src1[N], dst1[N];
+
+void foo (int a)
+{
+  dst[0] = (short) (a * (int) src[0]);
+  dst[1] = (short) (a * (int) src[1]);
+  dst[2] = (short) (a * (int) src[2]);
+  dst[3] = (short) (a * (int) src[3]);
+  dst[4] = (short) (a * (int) src[4]);
+  dst[5] = (short) (a * (int) src[5]);
+  dst[6] = (short) (a * (int) src[6]);
+  dst[7] = (short) (a * (int) src[7]);
+  dst[8] = (short) (a * (int) src[8]);
+  dst[9] = (short) (a * (int) src[9]);
+  dst[10] = (short) (a * (int) src[10]);
+  dst[11] = (short) (a * (int) src[11]);
+  dst[12] = (short) (a * (int) src[12]);
+  dst[13] = (short) (a * (int) src[13]);
+  dst[14] = (short) (a * (int) src[14]);
+  dst[15] = (short) (a * (int) src[15]);
+
+  dst1[0] += src1[0];
+  dst1[1] += src1[1];
+  dst1[2] += src1[2];
+  dst1[3] += src1[3];
+  dst1[4] += src1[4];
+  dst1[5] += src1[5];
+  dst1[6] += src1[6];
+  dst1[7] += src1[7];
+}
+
+
+int main (void)
+{
+  int i;
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+    {
+      dst[i] = 2;
+      dst1[i] = 0;
+      src[i] = i;
+      src1[i] = i+2;
+    }
+
+  foo (A);
+
+  for (i = 0; i < N; i++)
+    {
+      if (dst[i] != A * i
+          || (i < N/2 && dst1[i] != i + 2))
+        abort ();
+    }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target { vect_int_mult &&  { vect_pack_trunc && vect_unpack } } } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
--- a/src/gcc/testsuite/gcc.dg/vect/bb-slp-29.c
+++ b/src/gcc/testsuite/gcc.dg/vect/bb-slp-29.c
@@ -0,0 +1,59 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define A 3
+#define B 4
+#define N 256
+
+short src[N], dst[N];
+
+void foo (short * __restrict__ dst, short * __restrict__ src, int h, int stride, int dummy)
+{
+  int i;
+  h /= 16;
+  for (i = 0; i < h; i++)
+    {
+      dst[0] = A*src[0] + B*src[1];
+      dst[1] = A*src[1] + B*src[2];
+      dst[2] = A*src[2] + B*src[3];
+      dst[3] = A*src[3] + B*src[4];
+      dst[4] = A*src[4] + B*src[5];
+      dst[5] = A*src[5] + B*src[6];
+      dst[6] = A*src[6] + B*src[7];
+      dst[7] = A*src[7] + B*src[8];
+      dst += stride;
+      src += stride;
+      if (dummy == 32)
+        abort ();
+   }
+}
+
+
+int main (void)
+{
+  int i;
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+    {
+       dst[i] = 0;
+       src[i] = i;
+    }
+
+  foo (dst, src, N, 8, 0);
+
+  for (i = 0; i < N/2; i++)
+    {
+      if (dst[i] != A * src[i] + B * src[i+1])
+        abort ();
+    }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp"  { target { vect_int_mult &&  vect_element_align } } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
--- a/src/gcc/testsuite/gcc.dg/vect/bb-slp-cond-1.c
+++ b/src/gcc/testsuite/gcc.dg/vect/bb-slp-cond-1.c
@@ -0,0 +1,46 @@
+/* { dg-require-effective-target vect_condition } */
+
+#include "tree-vect.h"
+
+#define N 128
+
+__attribute__((noinline, noclone)) void
+foo (int *a, int stride)
+{
+  int i;
+
+  for (i = 0; i < N/stride; i++, a += stride)
+   {
+     a[0] = a[0] ? 1 : 5;
+     a[1] = a[1] ? 2 : 6;
+     a[2] = a[2] ? 3 : 7;
+     a[3] = a[3] ? 4 : 8;
+   }
+}
+
+
+int a[N];
+int main ()
+{
+  int i;
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+    a[i] = i;
+
+  foo (a, 4);
+
+  for (i = 1; i < N; i++)
+    if (a[i] != i%4 + 1)
+      abort ();
+
+  if (a[0] != 5)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect_element_align } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
--- a/src/gcc/testsuite/gcc.dg/vect/bb-slp-cond-3.c
+++ b/src/gcc/testsuite/gcc.dg/vect/bb-slp-cond-3.c
@@ -0,0 +1,85 @@
+/* { dg-require-effective-target vect_condition } */
+
+#include "tree-vect.h"
+
+#define N 64 
+
+/* Comparison in int, then/else and result in unsigned char.  */ 
+
+static inline unsigned char
+foo (int x, int y, int a, int b)
+{
+  if (x >= y)
+    return a;
+  else
+    return b;
+}
+
+__attribute__((noinline, noclone)) void
+bar (unsigned char * __restrict__ a, unsigned char * __restrict__ b,
+     unsigned char * __restrict__ c, unsigned char * __restrict__ d,
+     unsigned char * __restrict__ e, int stride, int w)
+{
+  int i;
+  for (i = 0; i < N/stride; i++, a += stride, b += stride, c += stride,
+				d += stride, e += stride)
+    {
+      e[0] = foo (c[0], d[0], a[0] * w, b[0] * w);
+      e[1] = foo (c[1], d[1], a[1] * w, b[1] * w);
+      e[2] = foo (c[2], d[2], a[2] * w, b[2] * w);
+      e[3] = foo (c[3], d[3], a[3] * w, b[3] * w);
+      e[4] = foo (c[4], d[4], a[4] * w, b[4] * w);
+      e[5] = foo (c[5], d[5], a[5] * w, b[5] * w);
+      e[6] = foo (c[6], d[6], a[6] * w, b[6] * w);
+      e[7] = foo (c[7], d[7], a[7] * w, b[7] * w);
+      e[8] = foo (c[8], d[8], a[8] * w, b[8] * w);
+      e[9] = foo (c[9], d[9], a[9] * w, b[9] * w);
+      e[10] = foo (c[10], d[10], a[10] * w, b[10] * w);
+      e[11] = foo (c[11], d[11], a[11] * w, b[11] * w);
+      e[12] = foo (c[12], d[12], a[12] * w, b[12] * w);
+      e[13] = foo (c[13], d[13], a[13] * w, b[13] * w);
+      e[14] = foo (c[14], d[14], a[14] * w, b[14] * w);
+      e[15] = foo (c[15], d[15], a[15] * w, b[15] * w);
+    }
+}
+
+
+unsigned char a[N], b[N], c[N], d[N], e[N];
+
+int main ()
+{
+  int i;
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+    {
+      a[i] = i;
+      b[i] = 5;
+      e[i] = 0;
+
+      switch (i % 9)
+        {
+        case 0: asm (""); c[i] = i; d[i] = i + 1; break;
+        case 1: c[i] = 0; d[i] = 0; break;
+        case 2: c[i] = i + 1; d[i] = i - 1; break;
+        case 3: c[i] = i; d[i] = i + 7; break;
+        case 4: c[i] = i; d[i] = i; break;
+        case 5: c[i] = i + 16; d[i] = i + 3; break;
+        case 6: c[i] = i - 5; d[i] = i; break;
+        case 7: c[i] = i; d[i] = i; break;
+        case 8: c[i] = i; d[i] = i - 7; break;
+        }
+    }
+
+  bar (a, b, c, d, e, 16, 2);
+  for (i = 0; i < N; i++)
+    if (e[i] != ((i % 3) == 0 ? 10 : 2 * i))
+      abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target { vect_element_align && vect_int_mult } } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
--- a/src/gcc/testsuite/gcc.dg/vect/bb-slp-cond-4.c
+++ b/src/gcc/testsuite/gcc.dg/vect/bb-slp-cond-4.c
@@ -0,0 +1,85 @@
+/* { dg-require-effective-target vect_condition } */
+
+#include "tree-vect.h"
+
+#define N 64 
+
+/* Comparison in short, then/else and result in int.  */
+static inline int
+foo (short x, short y, int a, int b)
+{
+  if (x >= y)
+    return a;
+  else
+    return b;
+}
+
+__attribute__((noinline, noclone)) void
+bar (short * __restrict__ a, short * __restrict__ b,
+     short * __restrict__ c, short * __restrict__ d,
+     int * __restrict__ e, int stride, int w)
+{
+  int i;
+  for (i = 0; i < N/stride; i++, a += stride, b += stride, c += stride,
+				d += stride, e += stride)
+    {
+      e[0] = foo (c[0], d[0], a[0], b[0]);
+      e[1] = foo (c[1], d[1], a[1], b[1]);
+      e[2] = foo (c[2], d[2], a[2], b[2]);
+      e[3] = foo (c[3], d[3], a[3], b[3]);
+      e[4] = foo (c[4], d[4], a[4], b[4]);
+      e[5] = foo (c[5], d[5], a[5], b[5]);
+      e[6] = foo (c[6], d[6], a[6], b[6]);
+      e[7] = foo (c[7], d[7], a[7], b[7]);
+      e[8] = foo (c[8], d[8], a[8], b[8]);
+      e[9] = foo (c[9], d[9], a[9], b[9]);
+      e[10] = foo (c[10], d[10], a[10], b[10]);
+      e[11] = foo (c[11], d[11], a[11], b[11]);
+      e[12] = foo (c[12], d[12], a[12], b[12]);
+      e[13] = foo (c[13], d[13], a[13], b[13]);
+      e[14] = foo (c[14], d[14], a[14], b[14]);
+      e[15] = foo (c[15], d[15], a[15], b[15]);
+    }
+}
+
+
+short a[N], b[N], c[N], d[N];
+int e[N];
+
+int main ()
+{
+  int i;
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+    {
+      a[i] = i;
+      b[i] = 5;
+      e[i] = 0;
+
+      switch (i % 9)
+        {
+        case 0: asm (""); c[i] = - i - 1; d[i] = i + 1; break;
+        case 1: c[i] = 0; d[i] = 0; break;
+        case 2: c[i] = i + 1; d[i] = - i - 1; break;
+        case 3: c[i] = i; d[i] = i + 7; break;
+        case 4: c[i] = i; d[i] = i; break;
+        case 5: c[i] = i + 16; d[i] = i + 3; break;
+        case 6: c[i] = - i - 5; d[i] = - i; break;
+        case 7: c[i] = - i; d[i] = - i; break;
+        case 8: c[i] = - i; d[i] = - i - 7; break;
+        }
+    }
+
+  bar (a, b, c, d, e, 16, 2);
+  for (i = 0; i < N; i++)
+    if (e[i] != ((i % 3) == 0 ? 5 : i))
+      abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect_element_align } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
--- a/src/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c
+++ b/src/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c
@@ -0,0 +1,55 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 8
+
+unsigned short X[N];
+unsigned short Y[N];
+unsigned int result[N];
+
+/* unsigned short->unsigned int widening-mult.  */
+__attribute__ ((noinline, noclone)) void 
+foo (void)
+{
+  result[0] = (unsigned int)(X[0] * Y[0]);
+  result[1] = (unsigned int)(X[1] * Y[1]);
+  result[2] = (unsigned int)(X[2] * Y[2]);
+  result[3] = (unsigned int)(X[3] * Y[3]);
+  result[4] = (unsigned int)(X[4] * Y[4]);
+  result[5] = (unsigned int)(X[5] * Y[5]);
+  result[6] = (unsigned int)(X[6] * Y[6]);
+  result[7] = (unsigned int)(X[7] * Y[7]);
+}
+
+int main (void)
+{
+  int i, tmp;
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+    {
+      X[i] = i;
+      Y[i] = 64-i;
+    }
+
+  foo ();
+
+  for (i = 0; i < N; i++)
+    {
+      __asm__ volatile ("");
+      tmp = X[i] * Y[i];
+      if (result[i] != tmp)
+        abort ();
+    }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "slp" { target { vect_widen_mult_hi_to_si || vect_unpack } } } } */
+/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 8 "slp" { target vect_widen_mult_hi_to_si_pattern } } } */
+/* { dg-final { scan-tree-dump-times "pattern recognized" 8 "slp" { target vect_widen_mult_hi_to_si_pattern } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
--- a/src/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c
+++ b/src/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c
@@ -0,0 +1,53 @@
+/* { dg-require-effective-target vect_condition } */
+
+#include "tree-vect.h"
+
+#define N 128
+
+__attribute__((noinline, noclone)) void
+foo (short * __restrict__ a, int * __restrict__ b, int stride)
+{
+  int i;
+
+  for (i = 0; i < N/stride; i++, a += stride, b += stride)
+   {
+     a[0] = b[0] ? 1 : 7;
+     a[1] = b[1] ? 2 : 0;
+     a[2] = b[2] ? 3 : 0;
+     a[3] = b[3] ? 4 : 0;
+     a[4] = b[4] ? 5 : 0;
+     a[5] = b[5] ? 6 : 0;
+     a[6] = b[6] ? 7 : 0;
+     a[7] = b[7] ? 8 : 0;
+   }
+}
+
+short a[N];
+int b[N];
+int main ()
+{
+  int i;
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+    {
+      a[i] = i;
+      b[i] = -i;
+    }
+
+  foo (a, b, 8);
+
+  for (i = 1; i < N; i++)
+    if (a[i] != i%8 + 1)
+      abort ();
+
+  if (a[0] != 7)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target { vect_element_align && vect_pack_trunc } } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
--- a/src/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-slp-12.c
+++ b/src/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-slp-12.c
@@ -113,7 +113,7 @@
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect"  {target { vect_strided && vect_int_mult } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" {target { vect_strided && vect_int_mult } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect"  {target { vect_strided8 && vect_int_mult } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" {target { vect_strided8 && vect_int_mult } } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
   
--- a/src/gcc/testsuite/gcc.dg/vect/fast-math-pr35982.c
+++ b/src/gcc/testsuite/gcc.dg/vect/fast-math-pr35982.c
@@ -20,7 +20,7 @@
   return avg;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_extract_even_odd_wide  } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail vect_extract_even_odd_wide  } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_extract_even_odd  } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail vect_extract_even_odd  } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 
--- a/src/gcc/testsuite/gcc.dg/vect/fast-math-slp-27.c
+++ b/src/gcc/testsuite/gcc.dg/vect/fast-math-slp-27.c
@@ -13,5 +13,5 @@
    }
 }
 
-/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { target vect_strided } } } */
+/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { target vect_strided2 } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.dg/vect/fast-math-vect-complex-3.c
+++ b/src/gcc/testsuite/gcc.dg/vect/fast-math-vect-complex-3.c
@@ -56,5 +56,5 @@
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave  && vect_extract_even_odd_wide } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided2 } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.dg/vect/if-cvt-stores-vect-ifcvt-18.c
+++ b/src/gcc/testsuite/gcc.dg/vect/if-cvt-stores-vect-ifcvt-18.c
@@ -0,0 +1,69 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 50
+
+typedef struct {
+  short a;
+  short b;
+} data;
+
+data in1[N], in2[N], out[N];
+short result[N*2] = {10,-7,11,-6,12,-5,13,-4,14,-3,15,-2,16,-1,17,0,18,1,19,2,20,3,21,4,22,5,23,6,24,7,25,8,26,9,27,10,28,11,29,12,30,13,31,14,32,15,33,16,34,17,35,18,36,19,37,20,38,21,39,22,40,23,41,24,42,25,43,26,44,27,45,28,46,29,47,30,48,31,49,32,50,33,51,34,52,35,53,36,54,37,55,38,56,39,57,40,58,41,59,42};
+short out1[N], out2[N];
+
+__attribute__ ((noinline)) void
+foo ()
+{
+  int i;
+  short c, d;
+
+  for (i = 0; i < N; i++)
+    {
+      c = in1[i].b;
+      d = in2[i].b;
+
+      if (c >= d)
+        {
+          out[i].b = in1[i].a;
+          out[i].a = d + 5;
+        }
+      else
+        {
+          out[i].b = d - 12;
+          out[i].a = in2[i].a + d;
+        }
+    }
+}
+
+int
+main (void)
+{
+  int i;
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+    {
+      in1[i].a = i;
+      in1[i].b = i + 2;
+      in2[i].a = 5;
+      in2[i].b = i + 5;
+      __asm__ volatile ("");
+    }
+
+  foo ();
+
+  for (i = 0; i < N; i++)
+    {
+      if (out[i].a != result[2*i] || out[i].b != result[2*i+1])
+        abort ();
+    }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { xfail { vect_no_align || { ! vect_strided2 } } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.dg/vect/no-fre-pre-pr50208.c
+++ b/src/gcc/testsuite/gcc.dg/vect/no-fre-pre-pr50208.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+
+char c;
+int a, b;
+
+void foo (int j)
+{
+  int i;
+  while (--j)
+    {
+      b = 3;
+      for (i = 0; i < 2; ++i)
+        a = b ^ c;
+    }
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-10a.c
+++ b/src/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-10a.c
@@ -54,5 +54,5 @@
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */
+/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { target vect_strided2 } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-10b.c
+++ b/src/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-10b.c
@@ -53,5 +53,5 @@
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */
+/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { target vect_strided2 } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-18.c
+++ b/src/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-18.c
@@ -47,5 +47,5 @@
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { target vect_interleave } } } */
+/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { target { vect_interleave || vect_strided2 } } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-20.c
+++ b/src/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-20.c
@@ -50,5 +50,5 @@
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */
+/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { target vect_strided2 } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.dg/vect/no-tree-fre-pr50039.c
+++ b/src/gcc/testsuite/gcc.dg/vect/no-tree-fre-pr50039.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+
+extern unsigned char g_5;
+extern int g_31, g_76;
+int main(void) {
+ int i, j;
+    for (j=0; j < 2; ++j) {
+        g_31 = -3;
+        for (i=0; i < 2; ++i)
+          g_76 = (g_31 ? g_31+1 : 0) ^ g_5;
+    }
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
--- a/src/gcc/testsuite/gcc.dg/vect/no-vfa-pr29145.c
+++ b/src/gcc/testsuite/gcc.dg/vect/no-vfa-pr29145.c
@@ -1,5 +1,4 @@
 /* { dg-require-effective-target vect_int } */
-/* { dg-add-options quad_vectors } */
 
 #include <stdarg.h>
 #include "tree-vect.h"
@@ -8,7 +7,7 @@
 void with_restrict(int * __restrict p)
 {
   int i;
-  int *q = p - 2;
+  int *q = p - 1;
 
   for (i = 0; i < 1000; ++i) {
     p[i] = q[i];
@@ -19,7 +18,7 @@
 void without_restrict(int * p)
 {
   int i;
-  int *q = p - 2;
+  int *q = p - 1;
 
   for (i = 0; i < 1000; ++i) {
     p[i] = q[i];
@@ -38,8 +37,8 @@
     a[i] = b[i] = i;
   }
 
-  with_restrict(a + 2);
-  without_restrict(b + 2);
+  with_restrict(a + 1);
+  without_restrict(b + 1);
 
   for (i = 0; i < 1002; ++i) {
     if (a[i] != b[i])
--- a/src/gcc/testsuite/gcc.dg/vect/no-vfa-vect-101.c
+++ b/src/gcc/testsuite/gcc.dg/vect/no-vfa-vect-101.c
@@ -45,6 +45,7 @@
 }
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
-/* { dg-final { scan-tree-dump-times "can't determine dependence" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "can't determine dependence" 1 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "can't determine dependence" 2 "vect" { target vect_multiple_sizes } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 
--- a/src/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102a.c
+++ b/src/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102a.c
@@ -53,6 +53,7 @@
 }
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
-/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 2 "vect" { target vect_multiple_sizes } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 
--- a/src/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102.c
+++ b/src/gcc/testsuite/gcc.dg/vect/no-vfa-vect-102.c
@@ -53,6 +53,7 @@
 }
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
-/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 2 "vect" { target vect_multiple_sizes } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 
--- a/src/gcc/testsuite/gcc.dg/vect/no-vfa-vect-37.c
+++ b/src/gcc/testsuite/gcc.dg/vect/no-vfa-vect-37.c
@@ -58,5 +58,6 @@
    If/when the aliasing problems are resolved, unalignment may
    prevent vectorization on some targets.  */
 /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "can't determine dependence between" 2 "vect" } } */
+/* { dg-final { scan-tree-dump-times "can't determine dependence" 2 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "can't determine dependence" 4 "vect" { target vect_multiple_sizes } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.dg/vect/no-vfa-vect-79.c
+++ b/src/gcc/testsuite/gcc.dg/vect/no-vfa-vect-79.c
@@ -46,5 +46,6 @@
   If/when the aliasing problems are resolved, unalignment may
   prevent vectorization on some targets.  */
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "can't determine dependence between" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "can't determine dependence" 1 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "can't determine dependence" 2 "vect" { target vect_multiple_sizes } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.dg/vect/O3-pr39675-2.c
+++ b/src/gcc/testsuite/gcc.dg/vect/O3-pr39675-2.c
@@ -26,7 +26,7 @@
     }
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_strided_wide } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_strided_wide } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_strided4 } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_strided4 } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
   
--- a/src/gcc/testsuite/gcc.dg/vect/pr30843.c
+++ b/src/gcc/testsuite/gcc.dg/vect/pr30843.c
@@ -20,6 +20,6 @@
     }
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave || vect_strided4 } } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 
--- a/src/gcc/testsuite/gcc.dg/vect/pr30858.c
+++ b/src/gcc/testsuite/gcc.dg/vect/pr30858.c
@@ -11,5 +11,6 @@
 }
 
 /* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "Unknown def-use cycle pattern." 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Unknown def-use cycle pattern." 1 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "Unknown def-use cycle pattern." 2 "vect" { target vect_multiple_sizes } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.dg/vect/pr33866.c
+++ b/src/gcc/testsuite/gcc.dg/vect/pr33866.c
@@ -27,6 +27,6 @@
 }
 
 /* Needs interleaving support.  */
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave || vect_strided2 } } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 
--- a/src/gcc/testsuite/gcc.dg/vect/pr37539.c
+++ b/src/gcc/testsuite/gcc.dg/vect/pr37539.c
@@ -40,7 +40,7 @@
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_strided_wide } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target { vect_strided4 && vect_strided2 } } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 
 
--- a/src/gcc/testsuite/gcc.dg/vect/pr50014.c
+++ b/src/gcc/testsuite/gcc.dg/vect/pr50014.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+
+int f(unsigned char *s, int n)
+{
+  int sum = 0;
+  int i;
+
+  for (i = 0; i < n; i++)
+    sum += 256 * s[i];
+
+  return sum;
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
--- a/src/gcc/testsuite/gcc.dg/vect/pr51301.c
+++ b/src/gcc/testsuite/gcc.dg/vect/pr51301.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+
+typedef signed char int8_t;
+typedef signed long long int64_t;
+int64_t
+f0a (int8_t * __restrict__ arg1)
+{
+  int idx;
+  int64_t result = 0;
+  for (idx = 0; idx < 416; idx += 1)
+    result += arg1[idx] << (arg1[idx] == arg1[idx]);
+  return result;
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.dg/vect/pr51799.c
+++ b/src/gcc/testsuite/gcc.dg/vect/pr51799.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+
+typedef signed char int8_t;
+typedef unsigned char uint8_t;
+typedef signed short int16_t;
+typedef unsigned long uint32_t;
+void
+f0a (uint32_t * __restrict__ result, int8_t * __restrict__ arg1,
+     uint32_t * __restrict__ arg4, int8_t temp_6)
+{
+  int idx;
+  for (idx = 0; idx < 416; idx += 1)
+    {
+      result[idx] = (uint8_t)(((arg1[idx] << 7) + arg4[idx]) * temp_6);
+    }
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.dg/vect/pr52870.c
+++ b/src/gcc/testsuite/gcc.dg/vect/pr52870.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -ftree-vectorize" } */
+
+long
+test (int *x)
+{
+  unsigned long sx, xprec;
+
+  sx = *x >= 0 ? *x : -*x;
+
+  xprec = sx * 64;
+
+  if (sx < 16384)
+    foo (sx);
+
+  return xprec;
+}
--- a/src/gcc/testsuite/gcc.dg/vect/slp-11a.c
+++ b/src/gcc/testsuite/gcc.dg/vect/slp-11a.c
@@ -0,0 +1,75 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 8
+
+int
+main1 ()
+{
+  int i;
+  unsigned int out[N*8], a0, a1, a2, a3, a4, a5, a6, a7, b1, b0, b2, b3, b4, b5, b6, b7;
+  unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
+
+  /* Different operations - not SLPable.  */
+  for (i = 0; i < N; i++)
+    {
+      a0 = in[i*8] + 5;
+      a1 = in[i*8 + 1] * 6;
+      a2 = in[i*8 + 2] + 7;
+      a3 = in[i*8 + 3] + 8;
+      a4 = in[i*8 + 4] + 9;
+      a5 = in[i*8 + 5] + 10;
+      a6 = in[i*8 + 6] + 11;
+      a7 = in[i*8 + 7] + 12;
+
+      b0 = a0 * 3;
+      b1 = a1 * 2;
+      b2 = a2 * 12;
+      b3 = a3 * 5;
+      b4 = a4 * 8;
+      b5 = a5 * 4;
+      b6 = a6 * 3;
+      b7 = a7 * 2;
+
+      out[i*8] = b0 - 2;
+      out[i*8 + 1] = b1 - 3;
+      out[i*8 + 2] = b2 - 2;
+      out[i*8 + 3] = b3 - 1;
+      out[i*8 + 4] = b4 - 8;
+      out[i*8 + 5] = b5 - 7;
+      out[i*8 + 6] = b6 - 3;
+      out[i*8 + 7] = b7 - 7;
+    }
+
+  /* check results:  */
+  for (i = 0; i < N; i++)
+    {
+      if (out[i*8] !=  (in[i*8] + 5) * 3 - 2
+         || out[i*8 + 1] != (in[i*8 + 1] * 6) * 2 - 3
+         || out[i*8 + 2] != (in[i*8 + 2] + 7) * 12 - 2
+         || out[i*8 + 3] != (in[i*8 + 3] + 8) * 5 - 1
+         || out[i*8 + 4] != (in[i*8 + 4] + 9) * 8 - 8
+         || out[i*8 + 5] != (in[i*8 + 5] + 10) * 4 - 7
+         || out[i*8 + 6] != (in[i*8 + 6] + 11) * 3 - 3
+         || out[i*8 + 7] != (in[i*8 + 7] + 12) * 2 - 7)
+	abort ();
+    }
+
+  return 0;
+}
+
+int main (void)
+{
+  check_vect ();
+
+  main1 ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_strided8 && vect_int_mult } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! { vect_strided8 && vect_int_mult } } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.dg/vect/slp-11b.c
+++ b/src/gcc/testsuite/gcc.dg/vect/slp-11b.c
@@ -0,0 +1,49 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 8
+
+int
+main1 ()
+{
+  int i;
+  unsigned int out[N*8], a0, a1, a2, a3, a4, a5, a6, a7, b1, b0, b2, b3, b4, b5, b6, b7;
+  unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
+
+  /* Requires permutation - not SLPable.  */
+  for (i = 0; i < N*2; i++)
+    {
+      out[i*4] = (in[i*4] + 2) * 3;
+      out[i*4 + 1] = (in[i*4 + 2] + 2) * 7;
+      out[i*4 + 2] = (in[i*4 + 1] + 7) * 3;
+      out[i*4 + 3] = (in[i*4 + 3] + 3) * 4;
+    }
+
+  /* check results:  */
+  for (i = 0; i < N*2; i++)
+    {
+      if (out[i*4] !=  (in[i*4] + 2) * 3
+         || out[i*4 + 1] != (in[i*4 + 2] + 2) * 7
+         || out[i*4 + 2] != (in[i*4 + 1] + 7) * 3
+         || out[i*4 + 3] != (in[i*4 + 3] + 3) * 4)
+        abort ();
+    }
+
+  return 0;
+}
+
+int main (void)
+{
+  check_vect ();
+
+  main1 ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_strided4 && vect_int_mult } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! { vect_strided4 && vect_int_mult } } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.dg/vect/slp-11.c
+++ b/src/gcc/testsuite/gcc.dg/vect/slp-11.c
@@ -1,113 +0,0 @@
-/* { dg-require-effective-target vect_int } */
-
-#include <stdarg.h>
-#include "tree-vect.h"
-
-#define N 8 
-
-int
-main1 ()
-{
-  int i;
-  unsigned int out[N*8], a0, a1, a2, a3, a4, a5, a6, a7, b1, b0, b2, b3, b4, b5, b6, b7;
-  unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
-  float out2[N*8];
-
-  /* Different operations - not SLPable.  */
-  for (i = 0; i < N; i++)
-    {
-      a0 = in[i*8] + 5;
-      a1 = in[i*8 + 1] * 6;
-      a2 = in[i*8 + 2] + 7;
-      a3 = in[i*8 + 3] + 8;
-      a4 = in[i*8 + 4] + 9;
-      a5 = in[i*8 + 5] + 10;
-      a6 = in[i*8 + 6] + 11;
-      a7 = in[i*8 + 7] + 12;
-
-      b0 = a0 * 3;
-      b1 = a1 * 2;
-      b2 = a2 * 12;
-      b3 = a3 * 5;
-      b4 = a4 * 8;
-      b5 = a5 * 4;
-      b6 = a6 * 3;
-      b7 = a7 * 2;
-
-      out[i*8] = b0 - 2;
-      out[i*8 + 1] = b1 - 3; 
-      out[i*8 + 2] = b2 - 2;
-      out[i*8 + 3] = b3 - 1;
-      out[i*8 + 4] = b4 - 8;
-      out[i*8 + 5] = b5 - 7;
-      out[i*8 + 6] = b6 - 3;
-      out[i*8 + 7] = b7 - 7;
-    }
-
-  /* check results:  */
-  for (i = 0; i < N; i++)
-    {
-      if (out[i*8] !=  (in[i*8] + 5) * 3 - 2
-         || out[i*8 + 1] != (in[i*8 + 1] * 6) * 2 - 3
-         || out[i*8 + 2] != (in[i*8 + 2] + 7) * 12 - 2
-         || out[i*8 + 3] != (in[i*8 + 3] + 8) * 5 - 1
-         || out[i*8 + 4] != (in[i*8 + 4] + 9) * 8 - 8
-         || out[i*8 + 5] != (in[i*8 + 5] + 10) * 4 - 7
-         || out[i*8 + 6] != (in[i*8 + 6] + 11) * 3 - 3
-         || out[i*8 + 7] != (in[i*8 + 7] + 12) * 2 - 7)
-	abort ();
-    }
-
-  /* Requires permutation - not SLPable.  */
-  for (i = 0; i < N*2; i++)
-    {
-      out[i*4] = (in[i*4] + 2) * 3;
-      out[i*4 + 1] = (in[i*4 + 2] + 2) * 7;
-      out[i*4 + 2] = (in[i*4 + 1] + 7) * 3;
-      out[i*4 + 3] = (in[i*4 + 3] + 3) * 4;
-    }
-
-  /* check results:  */
-  for (i = 0; i < N*2; i++)
-    {
-      if (out[i*4] !=  (in[i*4] + 2) * 3
-         || out[i*4 + 1] != (in[i*4 + 2] + 2) * 7
-         || out[i*4 + 2] != (in[i*4 + 1] + 7) * 3
-         || out[i*4 + 3] != (in[i*4 + 3] + 3) * 4)
-        abort ();
-    }
-
-  /* Different operations - not SLPable.  */
-  for (i = 0; i < N*4; i++)
-    {
-      out2[i*2] = ((float) in[i*2] * 2 + 6) ;
-      out2[i*2 + 1] = (float) (in[i*2 + 1] * 3 + 7);
-    }
-
-  /* check results:  */
-  for (i = 0; i < N*4; i++)
-    {
-      if (out2[i*2] !=  ((float) in[i*2] * 2 + 6)
-         || out2[i*2 + 1] != (float) (in[i*2 + 1] * 3 + 7))
-        abort ();
-    }
-
-
-  return 0;
-}
-
-int main (void)
-{
-  check_vect ();
-
-  main1 ();
-
-  return 0;
-}
-
-/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect"  { target { { vect_uintfloat_cvt && vect_strided_wide } &&  vect_int_mult } } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect"  { target { { { ! vect_uintfloat_cvt } && vect_strided_wide } &&  vect_int_mult } } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect"  {target  { ! { vect_int_mult && vect_strided_wide } } } } }  */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0  "vect"  } } */
-/* { dg-final { cleanup-tree-dump "vect" } } */
-  
--- a/src/gcc/testsuite/gcc.dg/vect/slp-11c.c
+++ b/src/gcc/testsuite/gcc.dg/vect/slp-11c.c
@@ -0,0 +1,46 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 8
+
+int
+main1 ()
+{
+  int i;
+  unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
+  float out[N*8];
+
+  /* Different operations - not SLPable.  */
+  for (i = 0; i < N*4; i++)
+    {
+      out[i*2] = ((float) in[i*2] * 2 + 6) ;
+      out[i*2 + 1] = (float) (in[i*2 + 1] * 3 + 7);
+    }
+
+  /* check results:  */
+  for (i = 0; i < N*4; i++)
+    {
+      if (out[i*2] !=  ((float) in[i*2] * 2 + 6)
+         || out[i*2 + 1] != (float) (in[i*2 + 1] * 3 + 7))
+        abort ();
+    }
+
+
+  return 0;
+}
+
+int main (void)
+{
+  check_vect ();
+
+  main1 ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { { vect_uintfloat_cvt && vect_strided2 } && vect_int_mult } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! { { vect_uintfloat_cvt && vect_strided2 } && vect_int_mult } } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0  "vect"  } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.dg/vect/slp-12a.c
+++ b/src/gcc/testsuite/gcc.dg/vect/slp-12a.c
@@ -11,7 +11,7 @@
   int i;
   unsigned int out[N*8], a0, a1, a2, a3, a4, a5, a6, a7, b1, b0, b2, b3, b4, b5, b6, b7;
   unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
-  unsigned int ia[N], ib[N*2];
+  unsigned int ia[N];
 
   for (i = 0; i < N; i++)
     {
@@ -61,27 +61,6 @@
 	abort ();
     }
 
-  for (i = 0; i < N*2; i++)
-    {
-      out[i*4] = (in[i*4] + 2) * 3;
-      out[i*4 + 1] = (in[i*4 + 1] + 2) * 7;
-      out[i*4 + 2] = (in[i*4 + 2] + 7) * 3;
-      out[i*4 + 3] = (in[i*4 + 3] + 7) * 7;
-
-      ib[i] = 7;
-    }
-
-  /* check results:  */
-  for (i = 0; i < N*2; i++)
-    {
-      if (out[i*4] !=  (in[i*4] + 2) * 3
-         || out[i*4 + 1] != (in[i*4 + 1] + 2) * 7
-         || out[i*4 + 2] != (in[i*4 + 2] + 7) * 3
-         || out[i*4 + 3] != (in[i*4 + 3] + 7) * 7 
-         || ib[i] != 7)
-        abort ();
-    }
-
   return 0;
 }
 
@@ -94,11 +73,8 @@
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect"  {target { vect_strided_wide && vect_int_mult} } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  {target { {! {vect_strided_wide}} && vect_int_mult } } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect"  {target  { ! vect_int_mult } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" {target { vect_strided_wide && vect_int_mult } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" {target { {! {vect_strided_wide}} && vect_int_mult } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" {target  { ! vect_int_mult } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_strided8 && vect_int_mult } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! { vect_strided8 && vect_int_mult } } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_strided8 && vect_int_mult } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! { vect_strided8 && vect_int_mult } } } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
-  
--- a/src/gcc/testsuite/gcc.dg/vect/slp-12b.c
+++ b/src/gcc/testsuite/gcc.dg/vect/slp-12b.c
@@ -43,9 +43,9 @@
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  {target { vect_strided_wide && vect_int_mult } } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect"  {target { { ! { vect_int_mult }} || { ! {vect_strided_wide}}} } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect"  {target { vect_strided_wide && vect_int_mult } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect"  {target { { ! { vect_int_mult }} || { ! {vect_strided_wide}}} } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { vect_strided2 && vect_int_mult } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect"  { target { ! { vect_strided2 && vect_int_mult } } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect"  { target { vect_strided2 && vect_int_mult } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect"  { target { ! { vect_strided2 && vect_int_mult } } } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
   
--- a/src/gcc/testsuite/gcc.dg/vect/slp-12c.c
+++ b/src/gcc/testsuite/gcc.dg/vect/slp-12c.c
@@ -0,0 +1,53 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 8
+
+int
+main1 ()
+{
+  int i;
+  unsigned int out[N*8], a0, a1, a2, a3, a4, a5, a6, a7, b1, b0, b2, b3, b4, b5, b6, b7;
+  unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
+  unsigned int ia[N*2];
+
+  for (i = 0; i < N*2; i++)
+    {
+      out[i*4] = (in[i*4] + 2) * 3;
+      out[i*4 + 1] = (in[i*4 + 1] + 2) * 7;
+      out[i*4 + 2] = (in[i*4 + 2] + 7) * 3;
+      out[i*4 + 3] = (in[i*4 + 3] + 7) * 7;
+
+      ia[i] = 7;
+    }
+
+  /* check results:  */
+  for (i = 0; i < N*2; i++)
+    {
+      if (out[i*4] !=  (in[i*4] + 2) * 3
+         || out[i*4 + 1] != (in[i*4 + 1] + 2) * 7
+         || out[i*4 + 2] != (in[i*4 + 2] + 7) * 3
+         || out[i*4 + 3] != (in[i*4 + 3] + 7) * 7
+         || ia[i] != 7)
+        abort ();
+    }
+
+  return 0;
+}
+
+int main (void)
+{
+  check_vect ();
+
+  main1 ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { vect_int_mult } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect"  { target { ! vect_int_mult } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_int_mult } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! vect_int_mult } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.dg/vect/slp-18.c
+++ b/src/gcc/testsuite/gcc.dg/vect/slp-18.c
@@ -91,7 +91,7 @@
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_strided } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_strided } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided8 } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_strided8 } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
   
--- a/src/gcc/testsuite/gcc.dg/vect/slp-19a.c
+++ b/src/gcc/testsuite/gcc.dg/vect/slp-19a.c
@@ -0,0 +1,61 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 16
+
+int
+main1 ()
+{
+  unsigned int i;
+  unsigned int out[N*8];
+  unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
+  unsigned int ia[N*2];
+
+  for (i = 0; i < N; i++)
+    {
+      out[i*8] = in[i*8];
+      out[i*8 + 1] = in[i*8 + 1];
+      out[i*8 + 2] = in[i*8 + 2];
+      out[i*8 + 3] = in[i*8 + 3];
+      out[i*8 + 4] = in[i*8 + 4];
+      out[i*8 + 5] = in[i*8 + 5];
+      out[i*8 + 6] = in[i*8 + 6];
+      out[i*8 + 7] = in[i*8 + 7];
+
+      ia[i] = in[i*8 + 2];
+    }
+
+  /* check results:  */
+  for (i = 0; i < N; i++)
+    {
+      if (out[i*8] !=  in[i*8]
+         || out[i*8 + 1] != in[i*8 + 1]
+         || out[i*8 + 2] != in[i*8 + 2]
+         || out[i*8 + 3] != in[i*8 + 3]
+         || out[i*8 + 4] != in[i*8 + 4]
+         || out[i*8 + 5] != in[i*8 + 5]
+         || out[i*8 + 6] != in[i*8 + 6]
+         || out[i*8 + 7] != in[i*8 + 7]
+         || ia[i] != in[i*8 + 2])
+	abort ();
+    }
+
+  return 0;
+}
+
+int main (void)
+{
+  check_vect ();
+
+  main1 ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided8 } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! vect_strided8 } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_strided8 } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! vect_strided8} } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.dg/vect/slp-19b.c
+++ b/src/gcc/testsuite/gcc.dg/vect/slp-19b.c
@@ -0,0 +1,58 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 16
+
+int
+main1 ()
+{
+  unsigned int i;
+  unsigned int out[N*8];
+  unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
+  unsigned int ia[N*2], a0, a1, a2, a3;
+
+  for (i = 0; i < N*2; i++)
+    {
+      a0 = in[i*4] + 1;
+      a1 = in[i*4 + 1] + 2;
+      a2 = in[i*4 + 2] + 3;
+      a3 = in[i*4 + 3] + 4;
+
+      out[i*4] = a0;
+      out[i*4 + 1] = a1;
+      out[i*4 + 2] = a2;
+      out[i*4 + 3] = a3;
+
+      ia[i] = a2;
+    }
+
+  /* check results:  */
+  for (i = 0; i < N*2; i++)
+    {
+      if (out[i*4] !=  in[i*4] + 1
+         || out[i*4 + 1] != in[i*4 + 1] + 2
+         || out[i*4 + 2] != in[i*4 + 2] + 3
+         || out[i*4 + 3] != in[i*4 + 3] + 4
+         || ia[i] != in[i*4 + 2] + 3)
+        abort ();
+    }
+
+  return 0;
+}
+
+int main (void)
+{
+  check_vect ();
+
+  main1 ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided4 } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! vect_strided4 } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_strided4 } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! vect_strided4 } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.dg/vect/slp-19.c
+++ b/src/gcc/testsuite/gcc.dg/vect/slp-19.c
@@ -1,154 +0,0 @@
-/* { dg-require-effective-target vect_int } */
-
-#include <stdarg.h>
-#include "tree-vect.h"
-
-#define N 16 
-
-int
-main1 ()
-{
-  unsigned int i;
-  unsigned int out[N*8];
-  unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
-  unsigned int ia[N*2], a0, a1, a2, a3;
-
-  for (i = 0; i < N; i++)
-    {
-      out[i*8] = in[i*8];
-      out[i*8 + 1] = in[i*8 + 1];
-      out[i*8 + 2] = in[i*8 + 2];
-      out[i*8 + 3] = in[i*8 + 3];
-      out[i*8 + 4] = in[i*8 + 4];
-      out[i*8 + 5] = in[i*8 + 5];
-      out[i*8 + 6] = in[i*8 + 6];
-      out[i*8 + 7] = in[i*8 + 7];
-    
-      ia[i] = in[i*8 + 2];
-    }
-
-  /* check results:  */
-  for (i = 0; i < N; i++)
-    {
-      if (out[i*8] !=  in[i*8]
-         || out[i*8 + 1] != in[i*8 + 1]
-         || out[i*8 + 2] != in[i*8 + 2]
-         || out[i*8 + 3] != in[i*8 + 3]
-         || out[i*8 + 4] != in[i*8 + 4]
-         || out[i*8 + 5] != in[i*8 + 5]
-         || out[i*8 + 6] != in[i*8 + 6]
-         || out[i*8 + 7] != in[i*8 + 7]
-         || ia[i] != in[i*8 + 2])
-	abort ();
-    }
-
-  for (i = 0; i < N*2; i++)
-    {
-      a0 = in[i*4] + 1;
-      a1 = in[i*4 + 1] + 2;
-      a2 = in[i*4 + 2] + 3;
-      a3 = in[i*4 + 3] + 4;
-
-      out[i*4] = a0;
-      out[i*4 + 1] = a1;
-      out[i*4 + 2] = a2;
-      out[i*4 + 3] = a3;
-
-      ia[i] = a2;
-    }
-
-  /* check results:  */
-  for (i = 0; i < N*2; i++)
-    {
-      if (out[i*4] !=  in[i*4] + 1
-         || out[i*4 + 1] != in[i*4 + 1] + 2
-         || out[i*4 + 2] != in[i*4 + 2] + 3
-         || out[i*4 + 3] != in[i*4 + 3] + 4
-         || ia[i] != in[i*4 + 2] + 3)
-        abort ();
-    }
-
-  /* The last stmt requires interleaving of not power of 2 size - not 
-     vectorizable.  */
-  for (i = 0; i < N/2; i++)
-    {
-      out[i*12] = in[i*12];
-      out[i*12 + 1] = in[i*12 + 1];
-      out[i*12 + 2] = in[i*12 + 2];
-      out[i*12 + 3] = in[i*12 + 3];
-      out[i*12 + 4] = in[i*12 + 4];
-      out[i*12 + 5] = in[i*12 + 5];
-      out[i*12 + 6] = in[i*12 + 6];
-      out[i*12 + 7] = in[i*12 + 7];
-      out[i*12 + 8] = in[i*12 + 8];
-      out[i*12 + 9] = in[i*12 + 9];
-      out[i*12 + 10] = in[i*12 + 10];
-      out[i*12 + 11] = in[i*12 + 11];
-
-      ia[i] = in[i*12 + 7];
-    }
-
-  /* check results:  */
-  for (i = 0; i < N/2; i++)
-    {
-      if (out[i*12] !=  in[i*12]
-         || out[i*12 + 1] != in[i*12 + 1]
-         || out[i*12 + 2] != in[i*12 + 2]
-         || out[i*12 + 3] != in[i*12 + 3]
-         || out[i*12 + 4] != in[i*12 + 4]
-         || out[i*12 + 5] != in[i*12 + 5]
-         || out[i*12 + 6] != in[i*12 + 6]
-         || out[i*12 + 7] != in[i*12 + 7]
-         || out[i*12 + 8] != in[i*12 + 8]
-         || out[i*12 + 9] != in[i*12 + 9]
-         || out[i*12 + 10] != in[i*12 + 10]
-         || out[i*12 + 11] != in[i*12 + 11]
-         || ia[i] != in[i*12 + 7])
-        abort ();
-    }
-
-  /* Hybrid SLP with unrolling by 2.  */
-  for (i = 0; i < N; i++)
-    {
-      out[i*6] = in[i*6];
-      out[i*6 + 1] = in[i*6 + 1];
-      out[i*6 + 2] = in[i*6 + 2];
-      out[i*6 + 3] = in[i*6 + 3];
-      out[i*6 + 4] = in[i*6 + 4];
-      out[i*6 + 5] = in[i*6 + 5];
-    
-      ia[i] = i;
-    } 
-    
-  /* check results:  */
-  for (i = 0; i < N/2; i++)
-    {
-      if (out[i*6] !=  in[i*6]
-         || out[i*6 + 1] != in[i*6 + 1]
-         || out[i*6 + 2] != in[i*6 + 2]
-         || out[i*6 + 3] != in[i*6 + 3]
-         || out[i*6 + 4] != in[i*6 + 4]
-         || out[i*6 + 5] != in[i*6 + 5]
-         || ia[i] != i)
-        abort ();
-    }
-
-
-  return 0;
-}
-
-int main (void)
-{
-  check_vect ();
-
-  main1 ();
-
-  return 0;
-}
-
-/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { target  vect_strided_wide  } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target  { ! { vect_strided_wide } } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect"  { target  vect_strided_wide  } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect"  { target { ! { vect_strided_wide } } } } } */
-/* { dg-final { cleanup-tree-dump "vect" } } */
-  
--- a/src/gcc/testsuite/gcc.dg/vect/slp-19c.c
+++ b/src/gcc/testsuite/gcc.dg/vect/slp-19c.c
@@ -0,0 +1,95 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 16
+
+int
+main1 ()
+{
+  unsigned int i;
+  unsigned int out[N*8];
+  unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
+  unsigned int ia[N*2], a0, a1, a2, a3;
+
+  /* The last stmt requires interleaving of not power of 2 size - not
+     vectorizable.  */
+  for (i = 0; i < N/2; i++)
+    {
+      out[i*12] = in[i*12];
+      out[i*12 + 1] = in[i*12 + 1];
+      out[i*12 + 2] = in[i*12 + 2];
+      out[i*12 + 3] = in[i*12 + 3];
+      out[i*12 + 4] = in[i*12 + 4];
+      out[i*12 + 5] = in[i*12 + 5];
+      out[i*12 + 6] = in[i*12 + 6];
+      out[i*12 + 7] = in[i*12 + 7];
+      out[i*12 + 8] = in[i*12 + 8];
+      out[i*12 + 9] = in[i*12 + 9];
+      out[i*12 + 10] = in[i*12 + 10];
+      out[i*12 + 11] = in[i*12 + 11];
+
+      ia[i] = in[i*12 + 7];
+    }
+
+  /* check results:  */
+  for (i = 0; i < N/2; i++)
+    {
+      if (out[i*12] !=  in[i*12]
+         || out[i*12 + 1] != in[i*12 + 1]
+         || out[i*12 + 2] != in[i*12 + 2]
+         || out[i*12 + 3] != in[i*12 + 3]
+         || out[i*12 + 4] != in[i*12 + 4]
+         || out[i*12 + 5] != in[i*12 + 5]
+         || out[i*12 + 6] != in[i*12 + 6]
+         || out[i*12 + 7] != in[i*12 + 7]
+         || out[i*12 + 8] != in[i*12 + 8]
+         || out[i*12 + 9] != in[i*12 + 9]
+         || out[i*12 + 10] != in[i*12 + 10]
+         || out[i*12 + 11] != in[i*12 + 11]
+         || ia[i] != in[i*12 + 7])
+        abort ();
+    }
+
+  /* Hybrid SLP with unrolling by 2.  */
+  for (i = 0; i < N; i++)
+    {
+      out[i*6] = in[i*6];
+      out[i*6 + 1] = in[i*6 + 1];
+      out[i*6 + 2] = in[i*6 + 2];
+      out[i*6 + 3] = in[i*6 + 3];
+      out[i*6 + 4] = in[i*6 + 4];
+      out[i*6 + 5] = in[i*6 + 5];
+
+      ia[i] = i;
+    }
+
+  /* check results:  */
+  for (i = 0; i < N/2; i++)
+    {
+      if (out[i*6] !=  in[i*6]
+         || out[i*6 + 1] != in[i*6 + 1]
+         || out[i*6 + 2] != in[i*6 + 2]
+         || out[i*6 + 3] != in[i*6 + 3]
+         || out[i*6 + 4] != in[i*6 + 4]
+         || out[i*6 + 5] != in[i*6 + 5]
+         || ia[i] != i)
+        abort ();
+    }
+
+  return 0;
+}
+
+int main (void)
+{
+  check_vect ();
+
+  main1 ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.dg/vect/slp-21.c
+++ b/src/gcc/testsuite/gcc.dg/vect/slp-21.c
@@ -199,9 +199,9 @@
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect"  { target { vect_strided || vect_extract_even_odd } } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target  { ! { vect_strided || vect_extract_even_odd } } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_strided }  } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect"  { target { ! { vect_strided } } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect"  { target { vect_strided4 || vect_extract_even_odd } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target  { ! { vect_strided4 || vect_extract_even_odd } } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_strided4 }  } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect"  { target { ! { vect_strided4 } } } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
   
--- a/src/gcc/testsuite/gcc.dg/vect/slp-23.c
+++ b/src/gcc/testsuite/gcc.dg/vect/slp-23.c
@@ -106,8 +106,8 @@
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target { vect_strided_wide } && {! { vect_no_align} } } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! { vect_strided_wide || vect_no_align} } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target { vect_strided8 && { ! { vect_no_align} } } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! { vect_strided8 || vect_no_align } } } } } */
 /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
   
--- a/src/gcc/testsuite/gcc.dg/vect/slp-25.c
+++ b/src/gcc/testsuite/gcc.dg/vect/slp-25.c
@@ -1,5 +1,4 @@
 /* { dg-require-effective-target vect_int } */
-/* { dg-add-options quad_vectors } */
 
 #include <stdarg.h>
 #include "tree-vect.h"
--- a/src/gcc/testsuite/gcc.dg/vect/slp-3.c
+++ b/src/gcc/testsuite/gcc.dg/vect/slp-3.c
@@ -1,12 +1,11 @@
 /* { dg-require-effective-target vect_int } */
-/* { dg-add-options quad_vectors } */
 
 #include <stdarg.h>
 #include "tree-vect.h"
 
-#define N 8 
+#define N 12 
 
-unsigned short in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
+unsigned short in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
 
 int
 main1 ()
@@ -101,7 +100,7 @@
     }
 
   /* SLP with unrolling by 8.  */
-  for (i = 0; i < N/2; i++)
+  for (i = 0; i < N/4; i++)
     {
       out[i*9] = in[i*9];
       out[i*9 + 1] = in[i*9 + 1];
@@ -115,7 +114,7 @@
     }
 
   /* check results:  */
-  for (i = 0; i < N/2; i++)
+  for (i = 0; i < N/4; i++)
     {
       if (out[i*9] !=  in[i*9]
          || out[i*9 + 1] != in[i*9 + 1]
--- a/src/gcc/testsuite/gcc.dg/vect/slp-cond-1.c
+++ b/src/gcc/testsuite/gcc.dg/vect/slp-cond-1.c
@@ -0,0 +1,126 @@
+/* { dg-require-effective-target vect_condition } */
+#include "tree-vect.h"
+
+#define N 32
+int a[N], b[N];
+int d[N], e[N];
+int k[N];
+
+__attribute__((noinline, noclone)) void
+f1 (void)
+{
+  int i;
+  for (i = 0; i < N/4; i++)
+    {
+      k[4*i] = a[4*i] < b[4*i] ? 17 : 0;
+      k[4*i+1] = a[4*i+1] < b[4*i+1] ? 17 : 0;
+      k[4*i+2] = a[4*i+2] < b[4*i+2] ? 17 : 0;
+      k[4*i+3] = a[4*i+3] < b[4*i+3] ? 17 : 0;
+    }
+}
+
+__attribute__((noinline, noclone)) void
+f2 (void)
+{
+  int i;
+  for (i = 0; i < N/2; ++i)
+    {
+      k[2*i] = a[2*i] < b[2*i] ? 0 : 24;
+      k[2*i+1] = a[2*i+1] < b[2*i+1] ? 7 : 4;
+    }
+}
+
+__attribute__((noinline, noclone)) void
+f3 (void)
+{
+  int i;
+  for (i = 0; i < N/2; ++i)
+    {
+      k[2*i] = a[2*i] < b[2*i] ? 51 : 12;
+      k[2*i+1] = a[2*i+1] > b[2*i+1] ? 51 : 12;
+    }
+}
+
+__attribute__((noinline, noclone)) void
+f4 (void)
+{
+  int i;
+  for (i = 0; i < N/2; ++i)
+    {
+      int d0 = d[2*i], e0 = e[2*i];
+      int d1 = d[2*i+1], e1 = e[2*i+1];
+      k[2*i] = a[2*i] >= b[2*i] ? d0 : e0;
+      k[2*i+1] = a[2*i+1] >= b[2*i+1] ? d1 : e1;
+    }
+}
+
+int
+main ()
+{
+  int i;
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+    {
+      switch (i % 9)
+	{
+	case 0: asm (""); a[i] = - i - 1; b[i] = i + 1; break;
+	case 1: a[i] = 0; b[i] = 0; break;
+	case 2: a[i] = i + 1; b[i] = - i - 1; break;
+	case 3: a[i] = i; b[i] = i + 7; break;
+	case 4: a[i] = i; b[i] = i; break;
+	case 5: a[i] = i + 16; b[i] = i + 3; break;
+	case 6: a[i] = - i - 5; b[i] = - i; break;
+	case 7: a[i] = - i; b[i] = - i; break;
+	case 8: a[i] = - i; b[i] = - i - 7; break;
+	}
+      d[i] = i;
+      e[i] = 2 * i;
+    }
+  f1 ();
+  for (i = 0; i < N; i++)
+    if (k[i] != ((i % 3) == 0 ? 17 : 0))
+      abort ();
+
+  f2 ();
+  for (i = 0; i < N; i++)
+    {
+      switch (i % 9)
+        {
+        case 0:
+	case 6:
+	  if (k[i] != ((i/9 % 2) == 0 ? 0 : 7))
+	    abort ();
+	  break;
+        case 1:
+        case 5:
+        case 7:
+	  if (k[i] != ((i/9 % 2) == 0 ? 4 : 24))
+            abort ();
+          break;
+        case 2:
+        case 4:
+        case 8:
+	  if (k[i] != ((i/9 % 2) == 0 ? 24 : 4))
+            abort ();
+          break;
+        case 3:
+	  if (k[i] != ((i/9 % 2) == 0 ? 7 : 0))
+            abort ();
+          break;
+        }
+    }
+
+  f3 ();
+
+  f4 ();
+  for (i = 0; i < N; i++)
+    if (k[i] != ((i % 3) == 0 ? e[i] : d[i]))
+      abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.dg/vect/slp-reduc-6.c
+++ b/src/gcc/testsuite/gcc.dg/vect/slp-reduc-6.c
@@ -42,7 +42,7 @@
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail { vect_no_int_add || { ! vect_unpack } } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail { vect_no_int_add || { ! { vect_unpack || vect_strided2 } } } } } } */
 /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } } */
 /* { dg-final { scan-tree-dump-times "different interleaving chains in one node" 1 "vect" { target { ! vect_no_int_add } } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c
+++ b/src/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c
@@ -0,0 +1,52 @@
+/* { dg-require-effective-target vect_int } */
+
+#include "tree-vect.h"
+#include <stdlib.h>
+
+#define N 32
+#define COEF 32470
+#define COEF2 324700
+
+unsigned char in[N];
+int out[N];
+int out2[N];
+
+__attribute__ ((noinline)) void
+foo ()
+{
+  int i;
+
+  for (i = 0; i < N/2; i++)
+    {
+      out[2*i] = in[2*i] * COEF;
+      out2[2*i] = in[2*i] + COEF2;
+      out[2*i+1] = in[2*i+1] * COEF;
+      out2[2*i+1] = in[2*i+1] + COEF2;
+    }
+}
+
+int main (void)
+{
+  int i;
+
+  for (i = 0; i < N; i++)
+    {
+      in[i] = i;
+      __asm__ volatile ("");
+    }
+
+  foo ();
+
+  for (i = 0; i < N; i++)
+    if (out[i] != in[i] * COEF || out2[i] != in[i] + COEF2)
+      abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_widen_mult_hi_to_si } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_widen_mult_hi_to_si } } } */
+/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
+/* { dg-final { scan-tree-dump-times "pattern recognized" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
--- a/src/gcc/testsuite/gcc.dg/vect/vect-104.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-104.c
@@ -64,6 +64,7 @@
 }
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
-/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 2 "vect" { target vect_multiple_sizes } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 
--- a/src/gcc/testsuite/gcc.dg/vect/vect-107.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-107.c
@@ -40,6 +40,6 @@
   return main1 ();
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_extract_even_odd_wide } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail vect_extract_even_odd_wide } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided2 } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail vect_strided2 } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.dg/vect/vect-109.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-109.c
@@ -1,5 +1,4 @@
 /* { dg-require-effective-target vect_int } */
-/* { dg-add-options quad_vectors } */
 
 #include <stdarg.h>
 #include "tree-vect.h"
--- a/src/gcc/testsuite/gcc.dg/vect/vect-10.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-10.c
@@ -22,5 +22,5 @@
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { ! vect_extract_even_odd } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { ! vect_strided2 } } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.dg/vect/vect-119.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-119.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+
+#define OUTER 32
+#define INNER 40
+
+static unsigned int
+bar (const unsigned int x[INNER][2], unsigned int sum)
+{
+  int i;
+
+  for (i = 0; i < INNER; i++)
+    sum += x[i][0] * x[i][0] + x[i][1] * x[i][1];
+  return sum;
+}
+
+unsigned int foo (const unsigned int x[OUTER][INNER][2])
+{
+  int i;
+  unsigned int sum;
+
+  sum = 0.0f;
+  for (i = 0; i < OUTER; i++)
+    sum = bar (x[i], sum);
+  return sum;
+}
+
+/* { dg-final { scan-tree-dump-times "Detected interleaving of size 2" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.dg/vect/vect-1.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-1.c
@@ -85,6 +85,6 @@
   fbar (a);
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 6 loops" 1 "vect" { target vect_extract_even_odd_wide } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 5 loops" 1 "vect" { xfail vect_extract_even_odd_wide } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 6 loops" 1 "vect" { target vect_strided2 } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 5 loops" 1 "vect" { xfail vect_strided2 } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.dg/vect/vect-40.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-40.c
@@ -1,4 +1,5 @@
 /* { dg-require-effective-target vect_float } */
+/* { dg-add-options double_vectors } */
 
 #include <stdarg.h>
 #include "tree-vect.h"
--- a/src/gcc/testsuite/gcc.dg/vect/vect-42.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-42.c
@@ -1,4 +1,5 @@
 /* { dg-require-effective-target vect_float } */
+/* { dg-add-options double_vectors } */
 
 #include <stdarg.h>
 #include "tree-vect.h"
--- a/src/gcc/testsuite/gcc.dg/vect/vect-46.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-46.c
@@ -1,4 +1,5 @@
 /* { dg-require-effective-target vect_float } */
+/* { dg-add-options double_vectors } */
 
 #include <stdarg.h>
 #include "tree-vect.h"
--- a/src/gcc/testsuite/gcc.dg/vect/vect-48.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-48.c
@@ -1,4 +1,5 @@
 /* { dg-require-effective-target vect_float } */
+/* { dg-add-options double_vectors } */
 
 #include <stdarg.h>
 #include "tree-vect.h"
--- a/src/gcc/testsuite/gcc.dg/vect/vect-52.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-52.c
@@ -1,4 +1,5 @@
 /* { dg-require-effective-target vect_float } */
+/* { dg-add-options double_vectors } */
 
 #include <stdarg.h>
 #include "tree-vect.h"
--- a/src/gcc/testsuite/gcc.dg/vect/vect-54.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-54.c
@@ -1,4 +1,5 @@
 /* { dg-require-effective-target vect_float } */
+/* { dg-add-options double_vectors } */
 
 #include <stdarg.h>
 #include "tree-vect.h"
--- a/src/gcc/testsuite/gcc.dg/vect/vect-96.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-96.c
@@ -1,4 +1,5 @@
 /* { dg-require-effective-target vect_int } */
+/* { dg-add-options double_vectors } */
 
 #include <stdarg.h>
 #include "tree-vect.h"
--- a/src/gcc/testsuite/gcc.dg/vect/vect-98.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-98.c
@@ -38,6 +38,6 @@
 }
 
 /* Needs interleaving support.  */
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd_wide } } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" { xfail  { vect_interleave && vect_extract_even_odd_wide } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided4 } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" { xfail  vect_strided4 } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.dg/vect/vect-cond-8a.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-cond-8a.c
@@ -0,0 +1,75 @@
+/* { dg-require-effective-target vect_condition } */
+
+#include "tree-vect.h"
+
+#define N 1024
+int a[N], b[N], c[N];
+char d[N], e[N], f[N];
+unsigned char k[N];
+
+__attribute__((noinline, noclone)) void
+f1 (void)
+{
+  int i;
+  for (i = 0; i < N; ++i)
+    k[i] = a[i] < b[i] ? 17 : 0;
+}
+
+__attribute__((noinline, noclone)) void
+f2 (void)
+{
+  int i;
+  for (i = 0; i < N; ++i)
+    k[i] = a[i] < b[i] ? 0 : 24;
+}
+
+__attribute__((noinline, noclone)) void
+f3 (void)
+{
+  int i;
+  for (i = 0; i < N; ++i)
+    k[i] = a[i] < b[i] ? 51 : 12;
+}
+
+int
+main ()
+{
+  int i;
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+    {
+      switch (i % 9)
+	{
+	case 0: asm (""); a[i] = - i - 1; b[i] = i + 1; break;
+	case 1: a[i] = 0; b[i] = 0; break;
+	case 2: a[i] = i + 1; b[i] = - i - 1; break;
+	case 3: a[i] = i; b[i] = i + 7; break;
+	case 4: a[i] = i; b[i] = i; break;
+	case 5: a[i] = i + 16; b[i] = i + 3; break;
+	case 6: a[i] = - i - 5; b[i] = - i; break;
+	case 7: a[i] = - i; b[i] = - i; break;
+	case 8: a[i] = - i; b[i] = - i - 7; break;
+	}
+      d[i] = i;
+      e[i] = 2 * i;
+    }
+  f1 ();
+  for (i = 0; i < N; i++)
+    if (k[i] != ((i % 3) == 0 ? 17 : 0))
+      abort ();
+  f2 ();
+  for (i = 0; i < N; i++)
+    if (k[i] != ((i % 3) == 0 ? 0 : 24))
+      abort ();
+  f3 ();
+  for (i = 0; i < N; i++)
+    if (k[i] != ((i % 3) == 0 ? 51 : 12))
+      abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "note: vectorized 1 loops" 3 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.dg/vect/vect-cselim-1.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-cselim-1.c
@@ -0,0 +1,86 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 50
+
+typedef struct {
+  short a;
+  short b;
+} data;
+
+data in1[N], in2[N], out[N];
+short result[N*2] = {7,-7,9,-6,11,-5,13,-4,15,-3,17,-2,19,-1,21,0,23,1,25,2,27,3,29,4,31,5,33,6,35,7,37,8,39,9,41,10,43,11,45,12,47,13,49,14,51,15,53,16,55,17,57,18,59,19,61,20,63,21,65,22,67,23,69,24,71,25,73,26,75,27,77,28,79,29,81,30,83,31,85,32,87,33,89,34,91,35,93,36,95,37,97,38,99,39,101,40,103,41,105,42};
+short out1[N], out2[N];
+
+__attribute__ ((noinline)) void
+foo ()
+{
+  int i;
+  short c, d;
+
+  /* Vectorizable with conditional store sinking.  */
+  for (i = 0; i < N; i++)
+    {
+      c = in1[i].b;
+      d = in2[i].b;
+
+      if (c >= d)
+        {
+          out[i].b = c;
+          out[i].a = d + 5;
+        }
+      else
+        {
+          out[i].b = d - 12;
+          out[i].a = c + d;
+        }
+    }
+
+  /* Not vectorizable.  */
+  for (i = 0; i < N; i++)
+    {
+      c = in1[i].b;
+      d = in2[i].b;
+
+      if (c >= d)
+        {
+          out1[i] = c;
+        }
+      else
+        {
+          out2[i] = c + d;
+        }
+    }
+}
+
+int
+main (void)
+{
+  int i;
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+    {
+      in1[i].a = i;
+      in1[i].b = i + 2;
+      in2[i].a = 5;
+      in2[i].b = i + 5;
+      __asm__ volatile ("");
+    }
+
+  foo ();
+
+  for (i = 0; i < N; i++)
+    {
+      if (out[i].a != result[2*i] || out[i].b != result[2*i+1])
+        abort ();
+    }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { xfail { vect_no_align || { ! vect_strided2 } } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.dg/vect/vect-cselim-2.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-cselim-2.c
@@ -0,0 +1,65 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 50
+
+int a[N], b[N], in1[N], in2[N];
+int result[2*N] = {5,-7,7,-6,9,-5,11,-4,13,-3,15,-2,17,-1,19,0,21,1,23,2,25,3,27,4,29,5,31,6,33,7,35,8,37,9,39,10,41,11,43,12,45,13,47,14,49,15,51,16,53,17,55,18,57,19,59,20,61,21,63,22,65,23,67,24,69,25,71,26,73,27,75,28,77,29,79,30,81,31,83,32,85,33,87,34,89,35,91,36,93,37,95,38,97,39,99,40,101,41,103,42};
+
+__attribute__ ((noinline)) void
+foo (int *pa, int *pb)
+{
+  int i;
+  int c, d;
+
+  /* Store sinking should not work here since the pointers may alias.  */
+  for (i = 0; i < N; i++)
+    {
+      c = in1[i];
+      d = in2[i];
+
+      if (c >= d)
+        {
+          *pa = c;
+          *pb = d + 5;
+        }
+      else
+        {
+          *pb = d - 12;
+          *pa = c + d;
+        }
+
+      pa++;
+      pb++;
+    }
+}
+
+int
+main (void)
+{
+  int i;
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+    {
+      in1[i] = i;
+      in2[i] = i + 5;
+      __asm__ volatile ("");
+    }
+
+  foo (a, b);
+
+  for (i = 0; i < N; i++)
+    {
+      if (a[i] != result[2*i] || b[i] != result[2*i+1])
+        abort ();
+    }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect"  } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.dg/vect/vect.exp
+++ b/src/gcc/testsuite/gcc.dg/vect/vect.exp
@@ -75,15 +75,20 @@
 lappend VECT_SLP_CFLAGS "-fdump-tree-slp-details"
 
 # Main loop.
-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/pr*.\[cS\]]]  \
-	"" $DEFAULT_VECTCFLAGS
-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/vect-*.\[cS\]]]  \
-	"" $DEFAULT_VECTCFLAGS
-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/slp-*.\[cS\]]]  \
-        "" $DEFAULT_VECTCFLAGS
-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/bb-slp*.\[cS\]]]  \
-        "" $VECT_SLP_CFLAGS
-
+set VECT_ADDITIONAL_FLAGS [list ""]
+if { [check_effective_target_lto] } {
+    lappend VECT_ADDITIONAL_FLAGS "-flto"
+}
+foreach flags $VECT_ADDITIONAL_FLAGS {
+    dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/pr*.\[cS\]]]  \
+	$flags $DEFAULT_VECTCFLAGS
+    dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/vect-*.\[cS\]]]  \
+	$flags $DEFAULT_VECTCFLAGS
+    dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/slp-*.\[cS\]]]  \
+        $flags $DEFAULT_VECTCFLAGS
+    dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/bb-slp*.\[cS\]]]  \
+        $flags $VECT_SLP_CFLAGS
+}
 
 #### Tests with special options
 global SAVED_DEFAULT_VECTCFLAGS
@@ -210,6 +215,12 @@
 dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/ggc-*.\[cS\]]]  \
         "" $DEFAULT_VECTCFLAGS
 
+# -ftree-loop-if-convert-stores
+set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
+lappend DEFAULT_VECTCFLAGS "-ftree-loop-if-convert-stores"
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/if-cvt-stores-vect-*.\[cS\]]]  \
+        "" $DEFAULT_VECTCFLAGS
+
 # With -O3.
 # Don't allow IPA cloning, because it throws our counts out of whack.
 set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
@@ -234,6 +245,18 @@
 dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/no-tree-reassoc-bb-slp-*.\[cS\]]]  \
         "" $VECT_SLP_CFLAGS
 
+# -fno-tree-fre
+set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
+lappend DEFAULT_VECTCFLAGS "-fno-tree-fre"
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/no-tree-fre-*.\[cS\]]]  \
+        "" $DEFAULT_VECTCFLAGS
+
+# -fno-tree-fre -fno-tree-pre
+set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
+lappend DEFAULT_VECTCFLAGS "-fno-tree-fre" "-fno-tree-pre"
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/no-fre-pre*.\[cS\]]]  \
+        "" $DEFAULT_VECTCFLAGS
+
 # Clean up.
 set dg-do-what-default ${save-dg-do-what-default}
 
--- a/src/gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c
@@ -1,5 +1,4 @@
 /* { dg-require-effective-target vect_int } */
-/* { dg-add-options quad_vectors } */
 
 #include <stdarg.h>
 #include "tree-vect.h"
--- a/src/gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c
@@ -1,4 +1,5 @@
 /* { dg-require-effective-target vect_int } */
+/* { dg-add-options double_vectors } */
 
 #include <stdarg.h>
 #include "tree-vect.h"
--- a/src/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c
@@ -1,5 +1,4 @@
 /* { dg-require-effective-target vect_int } */
-/* { dg-add-options quad_vectors } */
 
 #include <stdarg.h>
 #include "tree-vect.h"
--- a/src/gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c
@@ -1,4 +1,5 @@
 /* { dg-require-effective-target vect_int } */
+/* { dg-add-options double_vectors } */
 
 #include <stdarg.h>
 #include "tree-vect.h"
--- a/src/gcc/testsuite/gcc.dg/vect/vect-outer-1a.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-outer-1a.c
@@ -20,5 +20,6 @@
 }
 
 /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { target vect_multiple_sizes } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.dg/vect/vect-outer-1b.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-outer-1b.c
@@ -22,5 +22,6 @@
 }
 
 /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { target vect_multiple_sizes } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.dg/vect/vect-outer-1.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-outer-1.c
@@ -22,5 +22,6 @@
 }
 
 /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { target vect_multiple_sizes } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.dg/vect/vect-outer-2b.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-outer-2b.c
@@ -37,5 +37,6 @@
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "strided access in outer loop." 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { target vect_multiple_sizes } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.dg/vect/vect-outer-3a.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-outer-3a.c
@@ -49,5 +49,6 @@
 }
 
 /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail vect_no_align } } } */
-/* { dg-final { scan-tree-dump-times "step doesn't divide the vector-size" 2 "vect" } } */
+/* { dg-final { scan-tree-dump-times "step doesn't divide the vector-size" 2 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "step doesn't divide the vector-size" 3 "vect" { target vect_multiple_sizes } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.dg/vect/vect-outer-3b.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-outer-3b.c
@@ -49,5 +49,6 @@
 }
 
 /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" } } */
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 4 "vect" { target vect_multiple_sizes } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.dg/vect/vect-outer-5.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-outer-5.c
@@ -1,5 +1,4 @@
 /* { dg-require-effective-target vect_float } */
-/* { dg-add-options quad_vectors } */
 
 #include <stdarg.h>
 #include <signal.h>
@@ -17,7 +16,7 @@
   float B[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
   float C[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
   float D[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
-  float E[4] = {0,1,2,480};
+  float E[4] = {0,480,960,1440};
   float s;
 
   int i, j;
@@ -55,7 +54,7 @@
       s = 0;
       for (j=0; j<N; j+=4)
 	s += C[j];
-      B[i+3] = B[i] + s;
+      B[i+1] = B[i] + s;
     }
 
   /* check results:  */
--- a/src/gcc/testsuite/gcc.dg/vect/vect-over-widen-1.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-over-widen-1.c
@@ -0,0 +1,64 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_shift } */
+
+#include <stdlib.h>
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 64
+
+/* Modified rgb to rgb conversion from FFmpeg.  */
+__attribute__ ((noinline)) void
+foo (unsigned char *src, unsigned char *dst)
+{
+  unsigned char *s = src;
+  unsigned short *d = (unsigned short *)dst;
+  int i;
+
+  for (i = 0; i < N/4; i++)
+    {
+      const int b = *s++;
+      const int g = *s++;
+      const int r = *s++;
+      const int a = *s++;
+      *d = ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5));
+      d++;
+    }
+
+  s = src;
+  d = (unsigned short *)dst;
+  for (i = 0; i < N/4; i++)
+    {
+      const int b = *s++;
+      const int g = *s++;
+      const int r = *s++;
+      const int a = *s++;
+      if (*d != ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5)))
+        abort ();
+      d++;
+    }
+}
+
+int main (void)
+{
+  int i;
+  unsigned char in[N], out[N];
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+    {
+      in[i] = i;
+      out[i] = 255;
+      __asm__ volatile ("");
+    }
+
+  foo (in, out);
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vect_recog_over_widening_pattern: detected" 4 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
--- a/src/gcc/testsuite/gcc.dg/vect/vect-over-widen-2.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-over-widen-2.c
@@ -0,0 +1,65 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_shift } */
+
+#include <stdlib.h>
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 64
+
+/* Modified rgb to rgb conversion from FFmpeg.  */
+__attribute__ ((noinline)) void
+foo (unsigned char *src, unsigned char *dst)
+{
+  unsigned char *s = src;
+  int *d = (int *)dst;
+  int i;
+
+  for (i = 0; i < N/4; i++)
+    {
+      const int b = *s++;
+      const int g = *s++;
+      const int r = *s++;
+      const int a = *s++;
+      *d = ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5));
+      d++;
+    }
+
+  s = src;
+  d = (int *)dst;
+  for (i = 0; i < N/4; i++)
+    {
+      const int b = *s++;
+      const int g = *s++;
+      const int r = *s++;
+      const int a = *s++;
+      if (*d != ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5)))
+        abort ();
+      d++;
+    }
+}
+
+int main (void)
+{
+  int i;
+  unsigned char in[N], out[N];
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+    {
+      in[i] = i;
+      out[i] = 255;
+      __asm__ volatile ("");
+    }
+
+  foo (in, out);
+
+  return 0;
+}
+
+/* Final value stays in int, so no over-widening is detected at the moment.  */
+/* { dg-final { scan-tree-dump-times "vect_recog_over_widening_pattern: detected" 0 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
--- a/src/gcc/testsuite/gcc.dg/vect/vect-over-widen-3.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-over-widen-3.c
@@ -0,0 +1,64 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_shift } */
+
+#include <stdlib.h>
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 64
+
+/* Modified rgb to rgb conversion from FFmpeg.  */
+__attribute__ ((noinline)) void
+foo (unsigned char *src, unsigned char *dst)
+{
+  unsigned char *s = src;
+  unsigned short *d = (unsigned short *)dst;
+  int i;
+
+  for (i = 0; i < N/4; i++)
+    {
+      const int b = *s++;
+      const int g = *s++;
+      const int r = *s++;
+      const int a = *s++;
+      *d = ((b>>3) | ((g&0xFFC)<<3) | ((r+0xF8)>>8) | (a<<9));
+      d++;
+    }
+
+  s = src;
+  d = (unsigned short *)dst;
+  for (i = 0; i < N/4; i++)
+    {
+      const int b = *s++;
+      const int g = *s++;
+      const int r = *s++;
+      const int a = *s++;
+      if (*d != ((b>>3) | ((g&0xFFC)<<3) | ((r+0xF8)>>8) | (a<<9)))
+        abort ();
+      d++;
+    }
+}
+
+int main (void)
+{
+  int i;
+  unsigned char in[N], out[N];
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+    {
+      in[i] = i;
+      out[i] = 255;
+      __asm__ volatile ("");
+    }
+
+  foo (in, out);
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vect_recog_over_widening_pattern: detected" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
--- a/src/gcc/testsuite/gcc.dg/vect/vect-over-widen-4.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-over-widen-4.c
@@ -0,0 +1,68 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_shift } */
+
+#include <stdlib.h>
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 64
+
+/* Modified rgb to rgb conversion from FFmpeg.  */
+__attribute__ ((noinline)) int
+foo (unsigned char *src, unsigned char *dst)
+{
+  unsigned char *s = src;
+  unsigned short *d = (unsigned short *)dst, res;
+  int i, result = 0;
+
+  for (i = 0; i < N/4; i++)
+    {
+      const int b = *s++;
+      const int g = *s++;
+      const int r = *s++;
+      const int a = *s++;
+      res = ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5));
+      *d = res;
+      result += res;
+      d++;
+    }
+
+  s = src;
+  d = (unsigned short *)dst;
+  for (i = 0; i < N/4; i++)
+    {
+      const int b = *s++;
+      const int g = *s++;
+      const int r = *s++;
+      const int a = *s++;
+      if (*d != ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5)))
+        abort ();
+      d++;
+    }
+
+  return result;
+}
+
+int main (void)
+{
+  int i;
+  unsigned char in[N], out[N];
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+    {
+      in[i] = i;
+      out[i] = 255;
+      __asm__ volatile ("");
+    }
+
+  foo (in, out);
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vect_recog_over_widening_pattern: detected" 4 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
--- a/src/gcc/testsuite/gcc.dg/vect/vect-peel-1.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-peel-1.c
@@ -1,5 +1,4 @@
 /* { dg-require-effective-target vect_int } */
-/* { dg-add-options quad_vectors } */
 
 #include <stdarg.h>
 #include "tree-vect.h"
--- a/src/gcc/testsuite/gcc.dg/vect/vect-peel-2.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-peel-2.c
@@ -1,5 +1,4 @@
 /* { dg-require-effective-target vect_int } */
-/* { dg-add-options quad_vectors } */
 
 #include <stdarg.h>
 #include "tree-vect.h"
--- a/src/gcc/testsuite/gcc.dg/vect/vect-peel-4.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-peel-4.c
@@ -6,12 +6,12 @@
 #define N 128
 
 int ib[N+7];
+int ia[N+1];
 
 __attribute__ ((noinline))
 int main1 ()
 {
   int i;
-  int ia[N+1];
 
   /* Don't peel keeping one load and the store aligned.  */
   for (i = 0; i <= N; i++)
--- a/src/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8b.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8b.c
@@ -58,7 +58,8 @@
 }
 
 /* { dg-final { scan-tree-dump-times "vect_recog_dot_prod_pattern: detected" 1 "vect" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_multiple_sizes } } } */
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
 
--- a/src/gcc/testsuite/gcc.dg/vect/vect-shift-3.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-shift-3.c
@@ -0,0 +1,37 @@
+/* { dg-require-effective-target vect_shift } */
+/* { dg-require-effective-target vect_int } */
+
+#include "tree-vect.h"
+
+#define N 32
+
+unsigned short dst[N] __attribute__((aligned(N)));
+unsigned short src[N] __attribute__((aligned(N)));
+
+__attribute__ ((noinline))
+void array_shift(void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    dst[i] = src[i] >> 3;
+}
+
+int main()
+{
+  volatile int i;
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+    src[i] = i << 3;
+
+  array_shift ();
+
+  for (i = 0; i < N; i++)
+    if (dst[i] != i)
+      abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.dg/vect/vect-shift-4.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-shift-4.c
@@ -0,0 +1,37 @@
+/* { dg-require-effective-target vect_shift_char } */
+/* { dg-require-effective-target vect_int } */
+
+#include "tree-vect.h"
+
+#define N 32
+
+unsigned char dst[N] __attribute__((aligned(N)));
+unsigned char src[N] __attribute__((aligned(N)));
+
+__attribute__ ((noinline))
+void array_shift(void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    dst[i] = src[i] >> 3;
+}
+
+int main()
+{
+  volatile int i;
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+    src[i] = i << 3;
+
+  array_shift ();
+
+  for (i = 0; i < N; i++)
+    if (dst[i] != i)
+      abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.dg/vect/vect-strided-a-mult.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-strided-a-mult.c
@@ -71,6 +71,6 @@
   return 0;
 }   
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { vect_interleave && vect_extract_even_odd } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_strided2 } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 
--- a/src/gcc/testsuite/gcc.dg/vect/vect-strided-a-u16-i2.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-strided-a-u16-i2.c
@@ -55,6 +55,6 @@
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { vect_interleave && vect_extract_even_odd } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_strided2 } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
   
--- a/src/gcc/testsuite/gcc.dg/vect/vect-strided-a-u16-i4.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-strided-a-u16-i4.c
@@ -68,6 +68,6 @@
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { vect_interleave && vect_extract_even_odd } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_strided4 } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
   
--- a/src/gcc/testsuite/gcc.dg/vect/vect-strided-a-u16-mult.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-strided-a-u16-mult.c
@@ -62,6 +62,6 @@
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { vect_interleave && vect_extract_even_odd } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_strided2 } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 
--- a/src/gcc/testsuite/gcc.dg/vect/vect-strided-a-u32-mult.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-strided-a-u32-mult.c
@@ -61,6 +61,6 @@
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { vect_interleave && vect_extract_even_odd } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_strided2 } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 
--- a/src/gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i2-gap.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i2-gap.c
@@ -69,6 +69,6 @@
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { vect_interleave && vect_extract_even_odd } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_strided2 } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
   
--- a/src/gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i8-gap2.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i8-gap2.c
@@ -76,6 +76,6 @@
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided8 } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
   
--- a/src/gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i8-gap7.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-strided-a-u8-i8-gap7.c
@@ -81,6 +81,6 @@
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided8 } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
   
--- a/src/gcc/testsuite/gcc.dg/vect/vect-strided-float.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-strided-float.c
@@ -39,7 +39,7 @@
 }
 
 /* Needs interleaving support.  */
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd_wide } } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail { vect_interleave && vect_extract_even_odd_wide } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided2 } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail vect_strided2 } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
   
--- a/src/gcc/testsuite/gcc.dg/vect/vect-strided-mult.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-strided-mult.c
@@ -71,6 +71,6 @@
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { vect_interleave && vect_extract_even_odd } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_strided2 } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 
--- a/src/gcc/testsuite/gcc.dg/vect/vect-strided-mult-char-ls.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-strided-mult-char-ls.c
@@ -71,6 +71,6 @@
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { vect_interleave && vect_extract_even_odd } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_strided2 } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
   
--- a/src/gcc/testsuite/gcc.dg/vect/vect-strided-same-dr.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-strided-same-dr.c
@@ -72,5 +72,5 @@
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { vect_interleave && vect_extract_even_odd } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_strided2 } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.dg/vect/vect-strided-store-a-u8-i2.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-strided-store-a-u8-i2.c
@@ -55,6 +55,6 @@
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { vect_interleave } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { vect_interleave || vect_strided2 } } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 
--- a/src/gcc/testsuite/gcc.dg/vect/vect-strided-store-u16-i4.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-strided-store-u16-i4.c
@@ -65,8 +65,8 @@
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect"  { target { vect_interleave && vect_pack_trunc  } } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { { ! { vect_interleave } } && { vect_pack_trunc } } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect"  { target { { vect_interleave || vect_strided4 } && vect_pack_trunc } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { { ! { vect_interleave || vect_strided4 } } && { vect_pack_trunc } } } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 
 
--- a/src/gcc/testsuite/gcc.dg/vect/vect-strided-store-u32-i2.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-strided-store-u32-i2.c
@@ -39,7 +39,7 @@
 }
 
 /* Needs interleaving support.  */
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave } } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail { vect_interleave } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave || vect_strided2 } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail { vect_interleave || vect_strided2 } } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 
--- a/src/gcc/testsuite/gcc.dg/vect/vect-strided-u16-i2.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-strided-u16-i2.c
@@ -55,6 +55,6 @@
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { vect_interleave && vect_extract_even_odd } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_strided2 } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
   
--- a/src/gcc/testsuite/gcc.dg/vect/vect-strided-u16-i3.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-strided-u16-i3.c
@@ -0,0 +1,112 @@
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 128
+
+typedef struct {
+   unsigned short a;
+   unsigned short b;
+   unsigned short c;
+} s;
+
+#define A(I) (I)
+#define B(I) ((I) * 2)
+#define C(I) ((unsigned short) ~((I) ^ 0x18))
+
+void __attribute__ ((noinline))
+check1 (s *res)
+{
+  int i;
+
+  for (i = 0; i < N; i++)
+    if (res[i].a != C (i)
+	|| res[i].b != A (i)
+	|| res[i].c != B (i))
+      abort ();
+}
+
+void __attribute__ ((noinline))
+check2 (unsigned short *res)
+{
+  int i;
+
+  for (i = 0; i < N; i++)
+    if (res[i] != (unsigned short) (A (i) + B (i) + C (i)))
+      abort ();
+}
+
+void __attribute__ ((noinline))
+check3 (s *res)
+{
+  int i;
+
+  for (i = 0; i < N; i++)
+    if (res[i].a != i
+	|| res[i].b != i
+	|| res[i].c != i)
+      abort ();
+}
+
+void __attribute__ ((noinline))
+check4 (unsigned short *res)
+{
+  int i;
+
+  for (i = 0; i < N; i++)
+    if (res[i] != (unsigned short) (A (i) + B (i)))
+      abort ();
+}
+
+void __attribute__ ((noinline))
+main1 (s *arr)
+{
+  int i;
+  s *ptr = arr;
+  s res1[N];
+  unsigned short res2[N];
+
+  for (i = 0; i < N; i++)
+    {
+      res1[i].a = arr[i].c;
+      res1[i].b = arr[i].a;
+      res1[i].c = arr[i].b;
+    }
+  check1 (res1);
+
+  for (i = 0; i < N; i++)
+    res2[i] = arr[i].a + arr[i].b + arr[i].c;
+  check2 (res2);
+
+  for (i = 0; i < N; i++)
+    {
+      res1[i].a = i;
+      res1[i].b = i;
+      res1[i].c = i;
+    }
+  check3 (res1);
+
+  for (i = 0; i < N; i++)
+    res2[i] = arr[i].a + arr[i].b;
+  check4 (res2);
+}
+
+int main (void)
+{
+  int i;
+  s arr[N];
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+    {
+      arr[i].a = A (i);
+      arr[i].b = B (i);
+      arr[i].c = C (i);
+    }
+  main1 (arr);
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect"  { target vect_strided3 } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.dg/vect/vect-strided-u16-i4.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-strided-u16-i4.c
@@ -68,6 +68,6 @@
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { vect_interleave && vect_extract_even_odd } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_strided4 } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
   
--- a/src/gcc/testsuite/gcc.dg/vect/vect-strided-u32-i4.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-strided-u32-i4.c
@@ -63,6 +63,6 @@
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { vect_interleave && vect_extract_even_odd } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_strided4 } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
   
--- a/src/gcc/testsuite/gcc.dg/vect/vect-strided-u32-i8.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-strided-u32-i8.c
@@ -77,6 +77,6 @@
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided8 } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
   
--- a/src/gcc/testsuite/gcc.dg/vect/vect-strided-u32-mult.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-strided-u32-mult.c
@@ -60,6 +60,6 @@
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { vect_interleave && vect_extract_even_odd } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_strided2 } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 
--- a/src/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i2.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i2.c
@@ -54,6 +54,6 @@
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { vect_interleave && vect_extract_even_odd } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_strided2 } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
    
--- a/src/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i2-gap.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i2-gap.c
@@ -71,6 +71,6 @@
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { vect_interleave && vect_extract_even_odd } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_strided2 } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
   
--- a/src/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8.c
@@ -85,6 +85,6 @@
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided8 } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
   
--- a/src/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap2.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap2.c
@@ -78,6 +78,6 @@
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided8 } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
   
--- a/src/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4.c
@@ -98,6 +98,6 @@
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided8 } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
   
--- a/src/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap7.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap7.c
@@ -83,6 +83,6 @@
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided8 } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
   
--- a/src/gcc/testsuite/gcc.dg/vect/vect-vfa-03.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-vfa-03.c
@@ -53,6 +53,6 @@
 } 
 
 /* Needs interleaving support.  */
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" { xfail  { vect_interleave && vect_extract_even_odd } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided2 } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" { xfail  vect_strided2 } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c
@@ -0,0 +1,60 @@
+/* { dg-require-effective-target vect_int } */
+
+#include "tree-vect.h"
+#include <stdlib.h>
+
+#define N 32
+
+__attribute__ ((noinline)) void 
+foo (int *__restrict a,
+     short *__restrict b,
+     int n)
+{
+  int i;
+
+  for (i = 0; i < n; i++)
+    a[i] = b[i] * 2333;
+
+  for (i = 0; i < n; i++)
+    if (a[i] != b[i] * 2333)
+      abort ();
+}
+
+__attribute__ ((noinline)) void
+bar (int *__restrict a,
+     short *__restrict b,
+     int n)
+{
+  int i;
+
+  for (i = 0; i < n; i++)
+    a[i] = b[i] * (short) 2333;
+
+  for (i = 0; i < n; i++)
+    if (a[i] != b[i] * (short) 2333)
+      abort ();
+}
+
+int main (void)
+{
+  int i;
+  int a[N];
+  short b[N];
+
+  for (i = 0; i < N; i++)
+    {
+      a[i] = 0;
+      b[i] = i;
+      __asm__ volatile ("");
+    }
+
+  foo (a, b, N);
+  bar (a, b, N);
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_widen_mult_hi_to_si } } } */
+/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
+/* { dg-final { scan-tree-dump-times "pattern recognized" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
--- a/src/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c
@@ -0,0 +1,77 @@
+/* { dg-require-effective-target vect_int } */
+
+#include "tree-vect.h"
+#include <stdlib.h>
+
+#define N 32
+
+__attribute__ ((noinline)) void 
+foo (unsigned int *__restrict a,
+     unsigned short *__restrict b,
+     int n)
+{
+  int i;
+
+  for (i = 0; i < n; i++)
+    a[i] = b[i] * 2333;
+
+  for (i = 0; i < n; i++)
+    if (a[i] != b[i] * 2333)
+      abort ();
+}
+
+__attribute__ ((noinline)) void
+bar (unsigned int *__restrict a,
+     unsigned short *__restrict b,
+     int n)
+{
+  int i;
+
+  for (i = 0; i < n; i++)
+    a[i] = (unsigned short) 2333 * b[i];
+
+  for (i = 0; i < n; i++)
+    if (a[i] != b[i] * (unsigned short) 2333)
+      abort ();
+}
+
+__attribute__ ((noinline)) void
+baz (unsigned int *__restrict a,
+     unsigned short *__restrict b,
+     int n)
+{
+  int i;
+
+  for (i = 0; i < n; i++)
+    a[i] = b[i] * 233333333;
+
+  for (i = 0; i < n; i++)
+    if (a[i] != b[i] * 233333333)
+      abort ();
+}
+
+
+int main (void)
+{
+  int i;
+  unsigned int a[N];
+  unsigned short b[N];
+
+  for (i = 0; i < N; i++)
+    {
+      a[i] = 0;
+      b[i] = i;
+      __asm__ volatile ("");
+    }
+
+  foo (a, b, N);
+  bar (a, b, N);
+  baz (a, b, N);
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" { target vect_widen_mult_hi_to_si } } } */
+/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
+/* { dg-final { scan-tree-dump-times "pattern recognized" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
--- a/src/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c
@@ -0,0 +1,49 @@
+/* { dg-require-effective-target vect_int } */
+
+#include "tree-vect.h"
+#include <stdlib.h>
+
+#define N 32
+#define COEF 32470
+#define COEF2 324700
+
+unsigned char in[N];
+int out[N];
+int out2[N];
+
+__attribute__ ((noinline)) void
+foo (int a)
+{
+  int i;
+
+  for (i = 0; i < N; i++)
+    {
+      out[i] = in[i] * COEF;
+      out2[i] = in[i] + a;
+    }
+}
+
+int main (void)
+{
+  int i;
+
+  for (i = 0; i < N; i++)
+    {
+      in[i] = i;
+      __asm__ volatile ("");
+    }
+
+  foo (COEF2);
+
+  for (i = 0; i < N; i++)
+    if (out[i] != in[i] * COEF || out2[i] != in[i] + COEF2)
+      abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_widen_mult_hi_to_si } } } */
+/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
+/* { dg-final { scan-tree-dump-times "pattern recognized" 1 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
--- a/src/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c
@@ -0,0 +1,59 @@
+/* { dg-require-effective-target vect_int } */
+
+#include "tree-vect.h"
+#include <stdlib.h>
+
+#define N 32
+#define COEF 32470
+
+unsigned char in[N];
+int out[N];
+
+__attribute__ ((noinline)) void
+foo ()
+{
+  int i;
+
+  for (i = 0; i < N; i++)
+    out[i] = in[i] * COEF;
+}
+
+__attribute__ ((noinline)) void
+bar ()
+{
+  int i;
+
+  for (i = 0; i < N; i++)
+    out[i] = COEF * in[i];
+}
+
+int main (void)
+{
+  int i;
+
+  for (i = 0; i < N; i++)
+    {
+      in[i] = i;
+      __asm__ volatile ("");
+    }
+
+  foo ();
+
+  for (i = 0; i < N; i++)
+    if (out[i] != in[i] * COEF)
+      abort ();
+
+  bar ();
+
+  for (i = 0; i < N; i++)
+    if (out[i] != in[i] * COEF)
+      abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_widen_mult_hi_to_si } } } */
+/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
+/* { dg-final { scan-tree-dump-times "pattern recognized" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
--- a/src/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c
@@ -9,13 +9,11 @@
 unsigned short Y[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
 unsigned int result[N];
 
-/* short->int widening-mult */
+/* unsigned short->unsigned int widening-mult.  */
 __attribute__ ((noinline)) int
 foo1(int len) {
   int i;
 
-  /* Not vectorized because X[i] and Y[i] are casted to 'int'
-     so the widening multiplication pattern is not recognized.  */
   for (i=0; i<len; i++) {
     result[i] = (unsigned int)(X[i] * Y[i]);
   }
@@ -43,8 +41,8 @@
   return 0;
 }
 
-/*The induction loop is vectorized  */
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_pack_trunc } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_widen_mult_hi_to_si || vect_unpack } } } } */
+/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
+/* { dg-final { scan-tree-dump-times "pattern recognized" 1 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 
--- a/src/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8.c
@@ -9,7 +9,7 @@
 unsigned char Y[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
 unsigned short result[N];
 
-/* char->short widening-mult */
+/* unsigned char-> unsigned short widening-mult.  */
 __attribute__ ((noinline)) int
 foo1(int len) {
   int i;
@@ -28,8 +28,7 @@
   for (i=0; i<N; i++) {
     X[i] = i;
     Y[i] = 64-i;
-    if (i%4 == 0)
-      X[i] = 5;
+    __asm__ volatile ("");
   }
 
   foo1 (N);
@@ -43,5 +42,7 @@
 }
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_widen_mult_qi_to_hi || vect_unpack } } } } */
+/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" { target vect_widen_mult_qi_to_hi_pattern } } } */
+/* { dg-final { scan-tree-dump-times "pattern recognized" 1 "vect" { target vect_widen_mult_qi_to_hi_pattern } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 
--- a/src/gcc/testsuite/gcc.dg/vect/vect-widen-shift-s16.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-widen-shift-s16.c
@@ -0,0 +1,107 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_shift } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 64
+#define C 16
+
+__attribute__ ((noinline)) void
+foo (short *src, int *dst)
+{
+  int i;
+  short b, b0, b1, b2, b3, *s = src;
+  int *d = dst;
+
+  for (i = 0; i < N/4; i++)
+    {
+      b0 = *s++;
+      b1 = *s++;
+      b2 = *s++;
+      b3 = *s++;
+      *d = b0 << C;
+      d++;
+      *d = b1 << C;
+      d++;
+      *d = b2 << C;
+      d++;
+      *d = b3 << C;
+      d++;
+    }
+
+  s = src;
+  d = dst;
+  for (i = 0; i < N; i++)
+    {
+      b = *s++;
+      if (*d != b << C)
+        abort ();
+      d++;
+    }
+
+  s = src;
+  d = dst;
+  for (i = 0; i < N/4; i++)
+    {
+      b0 = *s++;
+      b1 = *s++;
+      b2 = *s++;
+      b3 = *s++;
+      *d = b0 << C;
+      d++;
+      *d = b1 << C;
+      d++;
+      *d = b2 << C;
+      d++;
+      *d = b3 << 6;
+      d++;
+    }
+
+  s = src;
+  d = dst;
+  for (i = 0; i < N/4; i++)
+    {
+      b = *s++;
+      if (*d != b << C)
+        abort ();
+      d++;
+      b = *s++;
+      if (*d != b << C)
+        abort ();
+      d++;
+      b = *s++;
+      if (*d != b << C)
+        abort ();
+      d++;
+      b = *s++;
+      if (*d != b << 6)
+        abort ();
+      d++;
+    }
+}
+
+int main (void)
+{
+  int i;
+  short in[N];
+  int out[N];
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+    {
+      in[i] = i;
+      out[i] = 255;
+      __asm__ volatile ("");
+    }
+
+  foo (in, out);
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 8 "vect" { target vect_widen_shift } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
--- a/src/gcc/testsuite/gcc.dg/vect/vect-widen-shift-s8.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-widen-shift-s8.c
@@ -0,0 +1,58 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_shift } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 64
+#define C 12
+
+__attribute__ ((noinline)) void
+foo (char *src, int *dst)
+{
+  int i;
+  char b, *s = src;
+  int *d = dst;
+
+  for (i = 0; i < N; i++)
+    {
+      b = *s++;
+      *d = b << C;
+      d++;
+    }
+
+  s = src;
+  d = dst;
+  for (i = 0; i < N; i++)
+    {
+      b = *s++;
+      if (*d != b << C)
+        abort ();
+      d++;
+    }
+}
+
+int main (void)
+{
+  int i;
+  char in[N];
+  int out[N];
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+    {
+      in[i] = i;
+      out[i] = 255;
+      __asm__ volatile ("");
+    }
+
+  foo (in, out);
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 1 "vect" { target vect_widen_shift } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
--- a/src/gcc/testsuite/gcc.dg/vect/vect-widen-shift-u16.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-widen-shift-u16.c
@@ -0,0 +1,58 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_shift } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 64
+#define C 7
+
+__attribute__ ((noinline)) void
+foo (unsigned short *src, unsigned int *dst)
+{
+  int i;
+  unsigned short b, *s = src;
+  unsigned int *d = dst;
+
+  for (i = 0; i < N; i++)
+    {
+      b = *s++;
+      *d = b << C;
+      d++;
+    }
+
+  s = src;
+  d = dst;
+  for (i = 0; i < N; i++)
+    {
+      b = *s++;
+      if (*d != b << C)
+        abort ();
+      d++;
+    }
+}
+
+int main (void)
+{
+  int i;
+  unsigned short in[N];
+  unsigned int out[N];
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+    {
+      in[i] = i;
+      out[i] = 255;
+      __asm__ volatile ("");
+    }
+
+  foo (in, out);
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 1 "vect" { target vect_widen_shift } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
--- a/src/gcc/testsuite/gcc.dg/vect/vect-widen-shift-u8.c
+++ b/src/gcc/testsuite/gcc.dg/vect/vect-widen-shift-u8.c
@@ -0,0 +1,64 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_shift } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 64
+#define C1 10
+#define C2 5
+
+__attribute__ ((noinline)) void
+foo (unsigned char *src, unsigned int *dst1, unsigned int *dst2)
+{
+  int i;
+  unsigned char b, *s = src;
+  unsigned int *d1 = dst1, *d2 = dst2;
+
+  for (i = 0; i < N; i++)
+    {
+      b = *s++;
+      *d1 = b << C1;
+      d1++;
+      *d2 = b << C2;
+      d2++;
+    }
+
+  s = src;
+  d1 = dst1;
+  d2 = dst2;
+  for (i = 0; i < N; i++)
+    {
+      b = *s++;
+      if (*d1 != b << C1 || *d2 != b << C2)
+        abort ();
+      d1++;
+      d2++;
+    }
+}
+
+int main (void)
+{
+  int i;
+  unsigned char in[N];
+  unsigned int out1[N];
+  unsigned int out2[N];
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+    {
+      in[i] = i;
+      out1[i] = 255;
+      out2[i] = 255;
+      __asm__ volatile ("");
+    }
+
+  foo (in, out1, out2);
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 2 "vect" { target vect_widen_shift } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- a/src/gcc/testsuite/gcc.target/arm/aapcs/abitest.h
+++ b/src/gcc/testsuite/gcc.target/arm/aapcs/abitest.h
@@ -1,3 +1,4 @@
+
 #define IN_FRAMEWORK
 
 #ifdef VFP
@@ -10,6 +11,13 @@
 #define D6	48
 #define D7	56
 
+#ifdef NEON
+#define Q0      D0
+#define Q1      D2
+#define Q2      D4
+#define Q3      D6
+#endif
+
 #define S0	64
 #define S1	68
 #define S2	72
@@ -27,23 +35,18 @@
 #define S14	120
 #define S15	124
 
-#define R0	128
-#define R1	132
-#define R2	136
-#define R3	140
-
-#define STACK	144
-
+#define CORE_REG_START 128
 #else
+#define CORE_REG_START 0
+#endif
 
-#define R0	0
-#define R1	4
-#define R2	8
-#define R3	12
+#define R0	CORE_REG_START
+#define R1	(R0 + 4)
+#define R2	(R1 + 4)
+#define R3	(R2 + 4)
+#define STACK	(R3 + 4)
 
-#define STACK   16
 
-#endif
 
 extern void abort (void);
 
--- a/src/gcc/testsuite/gcc.target/arm/aapcs/neon-constants.h
+++ b/src/gcc/testsuite/gcc.target/arm/aapcs/neon-constants.h
@@ -0,0 +1,33 @@
+
+
+#include "arm_neon.h"
+
+const int32x4_t i32x4_constvec1 = { 1101, 1102, 1103, 1104};
+const int32x4_t i32x4_constvec2 = { 2101, 2102, 2103, 2104};
+
+#define ELEM(INDEX) .val[INDEX]
+
+const int32x4x2_t i32x4x2_constvec1 =   {ELEM(0) = {0xaddebccb,11,12,13}, 
+					 ELEM(1) = {14, 15, 16, 17} };
+
+const int32x4x2_t i32x4x2_constvec2 = { ELEM(0) = {0xaadebcca,11,12,13}, 
+			                ELEM(1) = {140, 15, 16, 17}};
+
+const int32x4x3_t i32x4x3_constvec1 = { ELEM(0) = {0xabbccdde,8, 9, 10},
+					ELEM(1) = {0xabcccdde, 26, 27, 28},
+			                ELEM(2) = {0xaccccddf, 29, 30, 31}};
+
+const int32x4x3_t i32x4x3_constvec2 = { ELEM(0) = {0xbccccdd0,8, 9, 10},
+					ELEM(1) = {0xbdfe1000, 26, 27, 28},
+			                ELEM(2) = {0xaccccddf, 29, 30, 31}};
+const float32x4x2_t f32x4x2_constvec1 =
+  { ELEM(0) = { 7.101f, 0.201f, 0.301f, 0.401f} ,
+    ELEM(1) = { 8.101f, 0.501f, 0.601f, 0.701f} };
+
+const float32x4x2_t f32x4x2_constvec2 = 
+  { ELEM(0) = { 11.99f , 11.21f, 1.27f, 8.74f},
+    ELEM(1) = { 13.45f , 1.23f ,1.24f, 1.26f}};
+
+const int32x2_t i32x2_constvec1 = { 1283, 1345 };
+const int32x2x2_t i32x2x2_constvec1 = { ELEM(0) = { 0xabcdefab, 32 },
+					ELEM(1) = { 0xabcdefbc, 33 }};
--- a/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect1.c
+++ b/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect1.c
@@ -0,0 +1,27 @@
+/* Test AAPCS layout (VFP variant for Neon types) */
+
+/* { dg-do run { target arm*-*-*eabi* } } */
+/* { dg-require-effective-target arm_hard_vfp_ok  } */
+/* { dg-require-effective-target arm_neon_ok  } */
+/* { dg-require-effective-target arm32 } */
+/* { dg-add-options arm_neon } */
+
+
+#ifndef IN_FRAMEWORK
+#define VFP
+#define NEON
+#define TESTFILE "neon-vect1.c"
+#include "neon-constants.h"
+
+
+#include "abitest.h"
+#else
+
+ARG(int32x4_t, i32x4_constvec2, Q0) /* D0, D1 */
+ARG(float, 3.0f, S4) /* D2, Q1 */
+ARG(int32x4x2_t, i32x4x2_constvec1, Q2) /* Q2, Q3 - D4-D6 , s5-s12 */
+ARG(double, 12.0, D3) /* Backfill this particular argument.  */
+ARG(int32x4x2_t, i32x4x2_constvec2, STACK) 
+ARG(float, 5.0f, STACK+sizeof(int32x4x2_t)) /* No backfill allowed.  */
+LAST_ARG(int, 3, R0)
+#endif
--- a/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect2.c
+++ b/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect2.c
@@ -0,0 +1,23 @@
+/* Test AAPCS layout (VFP variant for Neon types) */
+
+/* { dg-do run { target arm*-*-*eabi* } } */
+/* { dg-require-effective-target arm_hard_vfp_ok } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-require-effective-target arm32 } */
+/* { dg-add-options arm_neon } */
+
+
+#ifndef IN_FRAMEWORK
+#define VFP
+#define NEON
+#define TESTFILE "neon-vect2.c"
+#include "neon-constants.h"
+
+
+#include "abitest.h"
+#else
+
+ARG(int32x4_t, i32x4_constvec2, Q0) /* D0, D1.  */
+ARG(float, 3.0f, S4) /* D2, Q1 occupied.  */
+LAST_ARG(int, 3, R0)
+#endif
--- a/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect3.c
+++ b/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect3.c
@@ -0,0 +1,26 @@
+/* Test AAPCS layout (VFP variant for Neon types) */
+
+/* { dg-do run { target arm*-*-*eabi* } } */
+/* { dg-require-effective-target arm_hard_vfp_ok } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-require-effective-target arm32 } */
+/* { dg-add-options arm_neon } */
+
+
+#ifndef IN_FRAMEWORK
+#define VFP
+#define NEON
+#define TESTFILE "neon-vect3.c"
+#include "neon-constants.h"
+
+
+#include "abitest.h"
+#else
+
+ARG(int32x4_t, i32x4_constvec2, Q0) /* D0, D1 */
+ARG(float, 3.0f, S4) /* D2, Q1 */
+ARG(int32x4x2_t, i32x4x2_constvec1, Q2) /* Q2, Q3 - D4-D6 , s5-s12 */
+ARG(int32x4x2_t, i32x4x2_constvec2, STACK) 
+ARG(double, 11.0, STACK+sizeof(int32x4x2_t)) /* No backfill in D3.  */
+LAST_ARG(int, 3, R0)
+#endif
--- a/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect4.c
+++ b/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect4.c
@@ -0,0 +1,27 @@
+/* Test AAPCS layout (VFP variant for Neon types) */
+
+/* { dg-do run { target arm*-*-*eabi* } } */
+/* { dg-require-effective-target arm_hard_vfp_ok  } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-require-effective-target arm32 } */
+/* { dg-add-options arm_neon } */
+
+
+#ifndef IN_FRAMEWORK
+#define VFP
+#define NEON
+#define TESTFILE "neon-vect4.c"
+#include "neon-constants.h"
+
+
+#include "abitest.h"
+#else
+
+ARG(int32x4_t, i32x4_constvec2, Q0) /* D0, D1 */
+ARG(float, 3.0f, S4) /* D2, Q1 */
+ARG(int32x4x2_t, i32x4x2_constvec1, Q2) /* Q2, Q3 - D4-D6 , s5-s12 */
+ARG(double, 12.0, D3) /* Backfill this particular argument.  */
+ARG(float, 5.0f, S5) /* Backfill in S5.  */
+ARG(int32x4x2_t, i32x4x2_constvec2, STACK) 
+LAST_ARG(int, 3, R0)
+#endif
--- a/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect5.c
+++ b/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect5.c
@@ -0,0 +1,28 @@
+/* Test AAPCS layout (VFP variant for Neon types) */
+
+/* { dg-do run { target arm*-*-*eabi* } } */
+/* { dg-require-effective-target arm_hard_vfp_ok } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-require-effective-target arm32 } */
+/* { dg-add-options arm_neon } */
+
+
+#ifndef IN_FRAMEWORK
+#define VFP
+#define NEON
+#define TESTFILE "neon-vect5.c"
+#include "neon-constants.h"
+
+
+#include "abitest.h"
+#else
+
+ARG(int32x4_t, i32x4_constvec2, Q0) /* D0, D1 */
+ARG(float, 3.0f, S4) /* D2, Q1 */
+ARG(float32x4x2_t, f32x4x2_constvec1, Q2) /* Q2, Q3 - D4-D6 , s5-s12 */
+ARG(double, 12.0, D3) /* Backfill this particular argument.  */
+ARG(int32x4x2_t, i32x4x2_constvec2, STACK) 
+ARG(float, 5.0f, STACK+sizeof(int32x4x2_t)) /* No backfill allowed.  */
+LAST_ARG(int, 3, R0)
+
+#endif
--- a/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect6.c
+++ b/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect6.c
@@ -0,0 +1,24 @@
+/* Test AAPCS layout (VFP variant for Neon types) */
+
+/* { dg-do run { target arm*-*-*eabi* } } */
+/* { dg-require-effective-target arm_hard_vfp_ok } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-require-effective-target arm32 } */
+/* { dg-add-options arm_neon } */
+
+
+#ifndef IN_FRAMEWORK
+#define VFP
+#define NEON
+#define TESTFILE "neon-vect6.c"
+#include "neon-constants.h"
+
+
+#include "abitest.h"
+#else
+
+ARG(int32x4_t, i32x4_constvec2, Q0) /* D0, D1 */
+ARG(int32x4x3_t, i32x4x3_constvec1, Q1) /* Q1, Q2, Q3  */
+ARG(int32x4x3_t, i32x4x3_constvec2, STACK)
+LAST_ARG(int, 3, R0)
+#endif
--- a/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect7.c
+++ b/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect7.c
@@ -0,0 +1,27 @@
+/* Test AAPCS layout (VFP variant for Neon types) */
+
+/* { dg-do run { target arm*-*-*eabi* } } */
+/* { dg-require-effective-target arm_hard_vfp_ok } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-require-effective-target arm32 } */
+/* { dg-add-options arm_neon } */
+
+
+#ifndef IN_FRAMEWORK
+#define VFP
+#define NEON
+#define TESTFILE "neon-vect7.c"
+#include "neon-constants.h"
+
+
+#include "abitest.h"
+#else
+
+ARG(float, 24.3f, S0) /* S0 , D0, Q0 */
+ARG(int32x4x3_t, i32x4x3_constvec1, Q1) /* Q1, Q2, Q3  */
+ARG(double, 25.6, D1)
+ARG(float, 12.67f, S1)
+ARG(int32x4x3_t, i32x4x3_constvec2, STACK)
+ARG(double, 2.47, STACK+sizeof(int32x4x3_t))
+LAST_ARG(int, 3, R0)
+#endif
--- a/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect8.c
+++ b/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect8.c
@@ -0,0 +1,27 @@
+/* Test AAPCS layout (VFP variant for Neon types) */
+
+/* { dg-do run { target arm*-*-*eabi* } } */
+/* { dg-require-effective-target arm_hard_vfp_ok } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-require-effective-target arm32 } */
+/* { dg-add-options arm_neon } */
+
+
+#ifndef IN_FRAMEWORK
+#define VFP
+#define NEON
+#define TESTFILE "neon-vect8.c"
+#include "neon-constants.h"
+
+
+#include "abitest.h"
+#else
+
+ARG(float, 24.3f, S0) /* S0 , D0, Q0 */
+ARG(int32x2_t, i32x2_constvec1, D1) /* D1  */
+ARG(double, 25.6, D2)
+ARG(float, 12.67f, S1)
+ARG(int32x4x3_t, i32x4x3_constvec2, STACK)
+ARG(double, 2.47, STACK+sizeof(int32x4x3_t))
+LAST_ARG(int, 3, R0)
+#endif
--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp10.c
+++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp10.c
@@ -1,6 +1,6 @@
 /* Test AAPCS layout (VFP variant) */
 
-/* { dg-do run { target arm*-*-eabi* } } */
+/* { dg-do run { target arm*-*-*eabi* } } */
 /* { dg-require-effective-target arm_hard_vfp_ok } */
 /* { dg-require-effective-target arm32 } */
 /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */
--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp11.c
+++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp11.c
@@ -1,6 +1,6 @@
 /* Test AAPCS layout (VFP variant) */
 
-/* { dg-do run { target arm*-*-eabi* } } */
+/* { dg-do run { target arm*-*-*eabi* } } */
 /* { dg-require-effective-target arm_hard_vfp_ok } */
 /* { dg-require-effective-target arm32 } */
 /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */
--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp12.c
+++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp12.c
@@ -1,6 +1,6 @@
 /* Test AAPCS layout (VFP variant) */
 
-/* { dg-do run { target arm*-*-eabi* } } */
+/* { dg-do run { target arm*-*-*eabi* } } */
 /* { dg-require-effective-target arm_hard_vfp_ok } */
 /* { dg-require-effective-target arm32 } */
 /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */
--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp13.c
+++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp13.c
@@ -1,6 +1,6 @@
 /* Test AAPCS layout (VFP variant) */
 
-/* { dg-do run { target arm*-*-eabi* } } */
+/* { dg-do run { target arm*-*-*eabi* } } */
 /* { dg-require-effective-target arm_hard_vfp_ok } */
 /* { dg-require-effective-target arm32 } */
 /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */
--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp14.c
+++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp14.c
@@ -1,6 +1,6 @@
 /* Test AAPCS layout (VFP variant) */
 
-/* { dg-do run { target arm*-*-eabi* } } */
+/* { dg-do run { target arm*-*-*eabi* } } */
 /* { dg-require-effective-target arm_hard_vfp_ok } */
 /* { dg-require-effective-target arm32 } */
 /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */
--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp15.c
+++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp15.c
@@ -1,6 +1,6 @@
 /* Test AAPCS layout (VFP variant) */
 
-/* { dg-do run { target arm*-*-eabi* } } */
+/* { dg-do run { target arm*-*-*eabi* } } */
 /* { dg-require-effective-target arm_hard_vfp_ok } */
 /* { dg-require-effective-target arm32 } */
 /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */
--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp16.c
+++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp16.c
@@ -1,6 +1,6 @@
 /* Test AAPCS layout (VFP variant) */
 
-/* { dg-do run { target arm*-*-eabi* } } */
+/* { dg-do run { target arm*-*-*eabi* } } */
 /* { dg-require-effective-target arm_hard_vfp_ok } */
 /* { dg-require-effective-target arm32 } */
 /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */
--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp17.c
+++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp17.c
@@ -1,6 +1,6 @@
 /* Test AAPCS layout (VFP variant) */
 
-/* { dg-do run { target arm*-*-eabi* } } */
+/* { dg-do run { target arm*-*-*eabi* } } */
 /* { dg-require-effective-target arm_hard_vfp_ok } */
 /* { dg-require-effective-target arm32 } */
 /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */
--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp1.c
+++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp1.c
@@ -1,6 +1,6 @@
 /* Test AAPCS layout (VFP variant) */
 
-/* { dg-do run { target arm*-*-eabi* } } */
+/* { dg-do run { target arm*-*-*eabi* } } */
 /* { dg-require-effective-target arm_hard_vfp_ok } */
 /* { dg-require-effective-target arm32 } */
 /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */
--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp2.c
+++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp2.c
@@ -1,6 +1,6 @@
 /* Test AAPCS layout (VFP variant) */
 
-/* { dg-do run { target arm*-*-eabi* } } */
+/* { dg-do run { target arm*-*-*eabi* } } */
 /* { dg-require-effective-target arm_hard_vfp_ok } */
 /* { dg-require-effective-target arm32 } */
 /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */
--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp3.c
+++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp3.c
@@ -1,6 +1,6 @@
 /* Test AAPCS layout (VFP variant) */
 
-/* { dg-do run { target arm*-*-eabi* } } */
+/* { dg-do run { target arm*-*-*eabi* } } */
 /* { dg-require-effective-target arm_hard_vfp_ok } */
 /* { dg-require-effective-target arm32 } */
 /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */
--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp4.c
+++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp4.c
@@ -1,6 +1,6 @@
 /* Test AAPCS layout (VFP variant) */
 
-/* { dg-do run { target arm*-*-eabi* } } */
+/* { dg-do run { target arm*-*-*eabi* } } */
 /* { dg-require-effective-target arm_hard_vfp_ok } */
 /* { dg-require-effective-target arm32 } */
 /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */
--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp5.c
+++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp5.c
@@ -1,6 +1,6 @@
 /* Test AAPCS layout (VFP variant) */
 
-/* { dg-do run { target arm*-*-eabi* } } */
+/* { dg-do run { target arm*-*-*eabi* } } */
 /* { dg-require-effective-target arm_hard_vfp_ok } */
 /* { dg-require-effective-target arm32 } */
 /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */
--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp6.c
+++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp6.c
@@ -1,6 +1,6 @@
 /* Test AAPCS layout (VFP variant) */
 
-/* { dg-do run { target arm*-*-eabi* } } */
+/* { dg-do run { target arm*-*-*eabi* } } */
 /* { dg-require-effective-target arm_hard_vfp_ok } */
 /* { dg-require-effective-target arm32 } */
 /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */
--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp7.c
+++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp7.c
@@ -1,6 +1,6 @@
 /* Test AAPCS layout (VFP variant) */
 
-/* { dg-do run { target arm*-*-eabi* } } */
+/* { dg-do run { target arm*-*-*eabi* } } */
 /* { dg-require-effective-target arm_hard_vfp_ok } */
 /* { dg-require-effective-target arm32 } */
 /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */
--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp8.c
+++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp8.c
@@ -1,6 +1,6 @@
 /* Test AAPCS layout (VFP variant) */
 
-/* { dg-do run { target arm*-*-eabi* } } */
+/* { dg-do run { target arm*-*-*eabi* } } */
 /* { dg-require-effective-target arm_hard_vfp_ok } */
 /* { dg-require-effective-target arm32 } */
 /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */
--- a/src/gcc/testsuite/gcc.target/arm/aapcs/vfp9.c
+++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp9.c
@@ -1,6 +1,6 @@
 /* Test AAPCS layout (VFP variant) */
 
-/* { dg-do run { target arm*-*-eabi* } } */
+/* { dg-do run { target arm*-*-*eabi* } } */
 /* { dg-require-effective-target arm_hard_vfp_ok } */
 /* { dg-require-effective-target arm32 } */
 /* { dg-options "-O -mfpu=vfp -mfloat-abi=hard" } */
--- a/src/gcc/testsuite/gcc.target/arm/cmp-1.c
+++ b/src/gcc/testsuite/gcc.target/arm/cmp-1.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+/* { dg-final { scan-assembler-not "\tbl\t" } } */
+/* { dg-final { scan-assembler-not "__aeabi" } } */
+int x, y;
+
+#define TEST_EXPR(NAME, ARGS, EXPR)			\
+  int NAME##1 ARGS { return (EXPR); }			\
+  int NAME##2 ARGS { return !(EXPR); }			\
+  int NAME##3 ARGS { return (EXPR) ? x : y; }		\
+  void NAME##4 ARGS { if (EXPR) x++; }			\
+  void NAME##5 ARGS { if (!(EXPR)) x++; }
+
+#define TEST(NAME, TYPE, OPERATOR) \
+  TEST_EXPR (NAME##_rr, (TYPE a1, TYPE a2), a1 OPERATOR a2)	\
+  TEST_EXPR (NAME##_rm, (TYPE a1, TYPE *a2), a1 OPERATOR *a2)	\
+  TEST_EXPR (NAME##_mr, (TYPE *a1, TYPE a2), *a1 OPERATOR a2)	\
+  TEST_EXPR (NAME##_mm, (TYPE *a1, TYPE *a2), *a1 OPERATOR *a2) \
+  TEST_EXPR (NAME##_rc, (TYPE a1), a1 OPERATOR 100)		\
+  TEST_EXPR (NAME##_cr, (TYPE a1), 100 OPERATOR a1)
+
+#define TEST_OP(NAME, OPERATOR) \
+  TEST (sc_##NAME, signed char, OPERATOR)		\
+  TEST (uc_##NAME, unsigned char, OPERATOR)		\
+  TEST (ss_##NAME, short, OPERATOR)			\
+  TEST (us_##NAME, unsigned short, OPERATOR)		\
+  TEST (si_##NAME, int, OPERATOR)			\
+  TEST (ui_##NAME, unsigned int, OPERATOR)		\
+  TEST (sll_##NAME, long long, OPERATOR)		\
+  TEST (ull_##NAME, unsigned long long, OPERATOR)
+
+TEST_OP (eq, ==)
+TEST_OP (ne, !=)
+TEST_OP (lt, <)
+TEST_OP (gt, >)
+TEST_OP (le, <=)
+TEST_OP (ge, >=)
--- a/src/gcc/testsuite/gcc.target/arm/cmp-2.c
+++ b/src/gcc/testsuite/gcc.target/arm/cmp-2.c
@@ -0,0 +1,49 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_vfp_ok } */
+/* { dg-skip-if "need fp instructions" { *-*-* } { "-mfloat-abi=soft" } { "" } } */
+/* { dg-options "-O -mfpu=vfp -mfloat-abi=softfp" } */
+/* { dg-final { scan-assembler-not "\tbl\t" } } */
+/* { dg-final { scan-assembler-not "__aeabi" } } */
+int x, y;
+
+#define EQ(X, Y) ((X) == (Y))
+#define NE(X, Y) ((X) != (Y))
+#define LT(X, Y) ((X) < (Y))
+#define GT(X, Y) ((X) > (Y))
+#define LE(X, Y) ((X) <= (Y))
+#define GE(X, Y) ((X) >= (Y))
+
+#define TEST_EXPR(NAME, ARGS, EXPR)			\
+  int NAME##1 ARGS { return (EXPR); }			\
+  int NAME##2 ARGS { return !(EXPR); }			\
+  int NAME##3 ARGS { return (EXPR) ? x : y; }		\
+  void NAME##4 ARGS { if (EXPR) x++; }			\
+  void NAME##5 ARGS { if (!(EXPR)) x++; }
+
+#define TEST(NAME, TYPE, OPERATOR) \
+  TEST_EXPR (NAME##_rr, (TYPE a1, TYPE a2), OPERATOR (a1, a2))		\
+  TEST_EXPR (NAME##_rm, (TYPE a1, TYPE *a2), OPERATOR (a1, *a2))	\
+  TEST_EXPR (NAME##_mr, (TYPE *a1, TYPE a2), OPERATOR (*a1, a2))	\
+  TEST_EXPR (NAME##_mm, (TYPE *a1, TYPE *a2), OPERATOR (*a1, *a2))	\
+  TEST_EXPR (NAME##_rc, (TYPE a1), OPERATOR (a1, 100))			\
+  TEST_EXPR (NAME##_cr, (TYPE a1), OPERATOR (100, a1))
+
+#define TEST_OP(NAME, OPERATOR) \
+  TEST (f_##NAME, float, OPERATOR)		\
+  TEST (d_##NAME, double, OPERATOR)		\
+  TEST (ld_##NAME, long double, OPERATOR)
+
+TEST_OP (eq, EQ)
+TEST_OP (ne, NE)
+TEST_OP (lt, LT)
+TEST_OP (gt, GT)
+TEST_OP (le, LE)
+TEST_OP (ge, GE)
+TEST_OP (blt, __builtin_isless)
+TEST_OP (bgt, __builtin_isgreater)
+TEST_OP (ble, __builtin_islessequal)
+TEST_OP (bge, __builtin_isgreaterequal)
+/* This one should be expanded into separate ordered and equality
+   comparisons.  */
+TEST_OP (blg, __builtin_islessgreater)
+TEST_OP (bun, __builtin_isunordered)
--- a/src/gcc/testsuite/gcc.target/arm/di-longlong64-sync-withhelpers.c
+++ b/src/gcc/testsuite/gcc.target/arm/di-longlong64-sync-withhelpers.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v5_ok } */
+/* { dg-options "-std=gnu99" } */
+/* { dg-add-options arm_arch_v5 } */
+/* { dg-message "note: '__sync_fetch_and_nand' changed semantics in GCC 4.4" "" { target *-*-* } 0 } */
+/* { dg-message "note: '__sync_nand_and_fetch' changed semantics in GCC 4.4" "" { target *-*-* } 0 } */
+/* { dg-message "file included" "In file included" { target *-*-* } 0 } */
+
+#include "../../gcc.dg/di-longlong64-sync-1.c"
+
+/* On an old ARM we have no ldrexd or strexd so we have to use helpers.  */
+/* { dg-final { scan-assembler-not "ldrexd" } } */
+/* { dg-final { scan-assembler-not "strexd" } } */
+/* { dg-final { scan-assembler "__sync_" } } */
--- a/src/gcc/testsuite/gcc.target/arm/di-longlong64-sync-withldrexd.c
+++ b/src/gcc/testsuite/gcc.target/arm/di-longlong64-sync-withldrexd.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arm_ok } */
+/* { dg-options "-marm -std=gnu99" } */
+/* { dg-require-effective-target arm_arch_v6k_ok } */
+/* { dg-add-options arm_arch_v6k } */
+/* { dg-message "note: '__sync_fetch_and_nand' changed semantics in GCC 4.4" "" { target *-*-* } 0 } */
+/* { dg-message "note: '__sync_nand_and_fetch' changed semantics in GCC 4.4" "" { target *-*-* } 0 } */
+/* { dg-message "file included" "In file included" { target *-*-* } 0 } */
+
+#include "../../gcc.dg/di-longlong64-sync-1.c"
+
+/* We should be using ldrexd, strexd and no helpers or shorter ldrex.  */
+/* { dg-final { scan-assembler-times "\tldrexd" 48 } } */
+/* { dg-final { scan-assembler-times "\tstrexd" 48 } } */
+/* { dg-final { scan-assembler-not "__sync_" } } */
+/* { dg-final { scan-assembler-not "ldrex\t" } } */
+/* { dg-final { scan-assembler-not "strex\t" } } */
--- a/src/gcc/testsuite/gcc.target/arm/lp1013209.c
+++ b/src/gcc/testsuite/gcc.target/arm/lp1013209.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-add-options arm_neon } */
+#include "arm_neon.h"
+
+void foo (void)
+{
+  int16_t buffer[2048];
+  int f;
+  for (f = 0; f < 128; f += 8)
+    vst1q_u16 (&buffer[f], (uint16x8_t){0});
+
+}
--- a/src/gcc/testsuite/gcc.target/arm/mla-2.c
+++ b/src/gcc/testsuite/gcc.target/arm/mla-2.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv7-a" } */
+
+long long foolong (long long x, short *a, short *b)
+{
+    return x + *a * *b;
+}
+
+/* { dg-final { scan-assembler "smlalbb" } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupf32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupf32.c
@@ -15,5 +15,5 @@
   out_float32x2_t = vld1_dup_f32 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupp16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupp16.c
@@ -15,5 +15,5 @@
   out_poly16x4_t = vld1_dup_p16 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupp8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupp8.c
@@ -15,5 +15,5 @@
   out_poly8x8_t = vld1_dup_p8 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_dups16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_dups16.c
@@ -15,5 +15,5 @@
   out_int16x4_t = vld1_dup_s16 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_dups32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_dups32.c
@@ -15,5 +15,5 @@
   out_int32x2_t = vld1_dup_s32 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_dups64.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_dups64.c
@@ -15,5 +15,5 @@
   out_int64x1_t = vld1_dup_s64 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_dups8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_dups8.c
@@ -15,5 +15,5 @@
   out_int8x8_t = vld1_dup_s8 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupu16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupu16.c
@@ -15,5 +15,5 @@
   out_uint16x4_t = vld1_dup_u16 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupu32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupu32.c
@@ -15,5 +15,5 @@
   out_uint32x2_t = vld1_dup_u32 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupu64.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupu64.c
@@ -15,5 +15,5 @@
   out_uint64x1_t = vld1_dup_u64 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupu8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupu8.c
@@ -15,5 +15,5 @@
   out_uint8x8_t = vld1_dup_u8 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1f32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1f32.c
@@ -15,5 +15,5 @@
   out_float32x2_t = vld1_f32 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanef32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanef32.c
@@ -16,5 +16,5 @@
   out_float32x2_t = vld1_lane_f32 (0, arg1_float32x2_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanep16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanep16.c
@@ -16,5 +16,5 @@
   out_poly16x4_t = vld1_lane_p16 (0, arg1_poly16x4_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanep8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanep8.c
@@ -16,5 +16,5 @@
   out_poly8x8_t = vld1_lane_p8 (0, arg1_poly8x8_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanes16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanes16.c
@@ -16,5 +16,5 @@
   out_int16x4_t = vld1_lane_s16 (0, arg1_int16x4_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanes32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanes32.c
@@ -16,5 +16,5 @@
   out_int32x2_t = vld1_lane_s32 (0, arg1_int32x2_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanes64.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanes64.c
@@ -16,5 +16,5 @@
   out_int64x1_t = vld1_lane_s64 (0, arg1_int64x1_t, 0);
 }
 
-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanes8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanes8.c
@@ -16,5 +16,5 @@
   out_int8x8_t = vld1_lane_s8 (0, arg1_int8x8_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_laneu16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_laneu16.c
@@ -16,5 +16,5 @@
   out_uint16x4_t = vld1_lane_u16 (0, arg1_uint16x4_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_laneu32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_laneu32.c
@@ -16,5 +16,5 @@
   out_uint32x2_t = vld1_lane_u32 (0, arg1_uint32x2_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_laneu64.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_laneu64.c
@@ -16,5 +16,5 @@
   out_uint64x1_t = vld1_lane_u64 (0, arg1_uint64x1_t, 0);
 }
 
-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_laneu8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_laneu8.c
@@ -16,5 +16,5 @@
   out_uint8x8_t = vld1_lane_u8 (0, arg1_uint8x8_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1p16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1p16.c
@@ -15,5 +15,5 @@
   out_poly16x4_t = vld1_p16 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1p8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1p8.c
@@ -15,5 +15,5 @@
   out_poly8x8_t = vld1_p8 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupf32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupf32.c
@@ -15,5 +15,5 @@
   out_float32x4_t = vld1q_dup_f32 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupp16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupp16.c
@@ -15,5 +15,5 @@
   out_poly16x8_t = vld1q_dup_p16 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupp8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupp8.c
@@ -15,5 +15,5 @@
   out_poly8x16_t = vld1q_dup_p8 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups16.c
@@ -15,5 +15,5 @@
   out_int16x8_t = vld1q_dup_s16 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups32.c
@@ -15,5 +15,5 @@
   out_int32x4_t = vld1q_dup_s32 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups64.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups64.c
@@ -15,5 +15,5 @@
   out_int64x2_t = vld1q_dup_s64 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups8.c
@@ -15,5 +15,5 @@
   out_int8x16_t = vld1q_dup_s8 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupu16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupu16.c
@@ -15,5 +15,5 @@
   out_uint16x8_t = vld1q_dup_u16 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupu32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupu32.c
@@ -15,5 +15,5 @@
   out_uint32x4_t = vld1q_dup_u32 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupu64.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupu64.c
@@ -15,5 +15,5 @@
   out_uint64x2_t = vld1q_dup_u64 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupu8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupu8.c
@@ -15,5 +15,5 @@
   out_uint8x16_t = vld1q_dup_u8 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Qf32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Qf32.c
@@ -15,5 +15,5 @@
   out_float32x4_t = vld1q_f32 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanef32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanef32.c
@@ -16,5 +16,5 @@
   out_float32x4_t = vld1q_lane_f32 (0, arg1_float32x4_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanep16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanep16.c
@@ -16,5 +16,5 @@
   out_poly16x8_t = vld1q_lane_p16 (0, arg1_poly16x8_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanep8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanep8.c
@@ -16,5 +16,5 @@
   out_poly8x16_t = vld1q_lane_p8 (0, arg1_poly8x16_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanes16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanes16.c
@@ -16,5 +16,5 @@
   out_int16x8_t = vld1q_lane_s16 (0, arg1_int16x8_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanes32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanes32.c
@@ -16,5 +16,5 @@
   out_int32x4_t = vld1q_lane_s32 (0, arg1_int32x4_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanes64.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanes64.c
@@ -16,5 +16,5 @@
   out_int64x2_t = vld1q_lane_s64 (0, arg1_int64x2_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanes8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanes8.c
@@ -16,5 +16,5 @@
   out_int8x16_t = vld1q_lane_s8 (0, arg1_int8x16_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_laneu16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_laneu16.c
@@ -16,5 +16,5 @@
   out_uint16x8_t = vld1q_lane_u16 (0, arg1_uint16x8_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_laneu32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_laneu32.c
@@ -16,5 +16,5 @@
   out_uint32x4_t = vld1q_lane_u32 (0, arg1_uint32x4_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_laneu64.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_laneu64.c
@@ -16,5 +16,5 @@
   out_uint64x2_t = vld1q_lane_u64 (0, arg1_uint64x2_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_laneu8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_laneu8.c
@@ -16,5 +16,5 @@
   out_uint8x16_t = vld1q_lane_u8 (0, arg1_uint8x16_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Qp16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Qp16.c
@@ -15,5 +15,5 @@
   out_poly16x8_t = vld1q_p16 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Qp8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Qp8.c
@@ -15,5 +15,5 @@
   out_poly8x16_t = vld1q_p8 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Qs16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Qs16.c
@@ -15,5 +15,5 @@
   out_int16x8_t = vld1q_s16 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Qs32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Qs32.c
@@ -15,5 +15,5 @@
   out_int32x4_t = vld1q_s32 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Qs64.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Qs64.c
@@ -15,5 +15,5 @@
   out_int64x2_t = vld1q_s64 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Qs8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Qs8.c
@@ -15,5 +15,5 @@
   out_int8x16_t = vld1q_s8 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Qu16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Qu16.c
@@ -15,5 +15,5 @@
   out_uint16x8_t = vld1q_u16 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Qu32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Qu32.c
@@ -15,5 +15,5 @@
   out_uint32x4_t = vld1q_u32 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Qu64.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Qu64.c
@@ -15,5 +15,5 @@
   out_uint64x2_t = vld1q_u64 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Qu8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Qu8.c
@@ -15,5 +15,5 @@
   out_uint8x16_t = vld1q_u8 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1s16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1s16.c
@@ -15,5 +15,5 @@
   out_int16x4_t = vld1_s16 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1s32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1s32.c
@@ -15,5 +15,5 @@
   out_int32x2_t = vld1_s32 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1s64.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1s64.c
@@ -15,5 +15,5 @@
   out_int64x1_t = vld1_s64 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1s8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1s8.c
@@ -15,5 +15,5 @@
   out_int8x8_t = vld1_s8 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1u16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1u16.c
@@ -15,5 +15,5 @@
   out_uint16x4_t = vld1_u16 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1u32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1u32.c
@@ -15,5 +15,5 @@
   out_uint32x2_t = vld1_u32 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1u64.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1u64.c
@@ -15,5 +15,5 @@
   out_uint64x1_t = vld1_u64 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld1u8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1u8.c
@@ -15,5 +15,5 @@
   out_uint8x8_t = vld1_u8 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupf32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupf32.c
@@ -15,5 +15,5 @@
   out_float32x2x2_t = vld2_dup_f32 (0);
 }
 
-/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupp16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupp16.c
@@ -15,5 +15,5 @@
   out_poly16x4x2_t = vld2_dup_p16 (0);
 }
 
-/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupp8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupp8.c
@@ -15,5 +15,5 @@
   out_poly8x8x2_t = vld2_dup_p8 (0);
 }
 
-/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_dups16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_dups16.c
@@ -15,5 +15,5 @@
   out_int16x4x2_t = vld2_dup_s16 (0);
 }
 
-/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_dups32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_dups32.c
@@ -15,5 +15,5 @@
   out_int32x2x2_t = vld2_dup_s32 (0);
 }
 
-/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_dups64.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_dups64.c
@@ -15,5 +15,5 @@
   out_int64x1x2_t = vld2_dup_s64 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_dups8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_dups8.c
@@ -15,5 +15,5 @@
   out_int8x8x2_t = vld2_dup_s8 (0);
 }
 
-/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupu16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupu16.c
@@ -15,5 +15,5 @@
   out_uint16x4x2_t = vld2_dup_u16 (0);
 }
 
-/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupu32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupu32.c
@@ -15,5 +15,5 @@
   out_uint32x2x2_t = vld2_dup_u32 (0);
 }
 
-/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupu64.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupu64.c
@@ -15,5 +15,5 @@
   out_uint64x1x2_t = vld2_dup_u64 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupu8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupu8.c
@@ -15,5 +15,5 @@
   out_uint8x8x2_t = vld2_dup_u8 (0);
 }
 
-/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2f32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2f32.c
@@ -15,5 +15,5 @@
   out_float32x2x2_t = vld2_f32 (0);
 }
 
-/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_lanef32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_lanef32.c
@@ -16,5 +16,5 @@
   out_float32x2x2_t = vld2_lane_f32 (0, arg1_float32x2x2_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_lanep16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_lanep16.c
@@ -16,5 +16,5 @@
   out_poly16x4x2_t = vld2_lane_p16 (0, arg1_poly16x4x2_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_lanep8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_lanep8.c
@@ -16,5 +16,5 @@
   out_poly8x8x2_t = vld2_lane_p8 (0, arg1_poly8x8x2_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_lanes16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_lanes16.c
@@ -16,5 +16,5 @@
   out_int16x4x2_t = vld2_lane_s16 (0, arg1_int16x4x2_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_lanes32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_lanes32.c
@@ -16,5 +16,5 @@
   out_int32x2x2_t = vld2_lane_s32 (0, arg1_int32x2x2_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_lanes8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_lanes8.c
@@ -16,5 +16,5 @@
   out_int8x8x2_t = vld2_lane_s8 (0, arg1_int8x8x2_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_laneu16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_laneu16.c
@@ -16,5 +16,5 @@
   out_uint16x4x2_t = vld2_lane_u16 (0, arg1_uint16x4x2_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_laneu32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_laneu32.c
@@ -16,5 +16,5 @@
   out_uint32x2x2_t = vld2_lane_u32 (0, arg1_uint32x2x2_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_laneu8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_laneu8.c
@@ -16,5 +16,5 @@
   out_uint8x8x2_t = vld2_lane_u8 (0, arg1_uint8x8x2_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2p16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2p16.c
@@ -15,5 +15,5 @@
   out_poly16x4x2_t = vld2_p16 (0);
 }
 
-/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2p8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2p8.c
@@ -15,5 +15,5 @@
   out_poly8x8x2_t = vld2_p8 (0);
 }
 
-/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2Qf32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2Qf32.c
@@ -15,6 +15,6 @@
   out_float32x4x2_t = vld2q_f32 (0);
 }
 
-/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2Q_lanef32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2Q_lanef32.c
@@ -16,5 +16,5 @@
   out_float32x4x2_t = vld2q_lane_f32 (0, arg1_float32x4x2_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2Q_lanep16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2Q_lanep16.c
@@ -16,5 +16,5 @@
   out_poly16x8x2_t = vld2q_lane_p16 (0, arg1_poly16x8x2_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2Q_lanes16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2Q_lanes16.c
@@ -16,5 +16,5 @@
   out_int16x8x2_t = vld2q_lane_s16 (0, arg1_int16x8x2_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2Q_lanes32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2Q_lanes32.c
@@ -16,5 +16,5 @@
   out_int32x4x2_t = vld2q_lane_s32 (0, arg1_int32x4x2_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2Q_laneu16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2Q_laneu16.c
@@ -16,5 +16,5 @@
   out_uint16x8x2_t = vld2q_lane_u16 (0, arg1_uint16x8x2_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2Q_laneu32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2Q_laneu32.c
@@ -16,5 +16,5 @@
   out_uint32x4x2_t = vld2q_lane_u32 (0, arg1_uint32x4x2_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2Qp16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2Qp16.c
@@ -15,6 +15,6 @@
   out_poly16x8x2_t = vld2q_p16 (0);
 }
 
-/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2Qp8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2Qp8.c
@@ -15,6 +15,6 @@
   out_poly8x16x2_t = vld2q_p8 (0);
 }
 
-/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2Qs16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2Qs16.c
@@ -15,6 +15,6 @@
   out_int16x8x2_t = vld2q_s16 (0);
 }
 
-/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2Qs32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2Qs32.c
@@ -15,6 +15,6 @@
   out_int32x4x2_t = vld2q_s32 (0);
 }
 
-/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2Qs8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2Qs8.c
@@ -15,6 +15,6 @@
   out_int8x16x2_t = vld2q_s8 (0);
 }
 
-/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2Qu16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2Qu16.c
@@ -15,6 +15,6 @@
   out_uint16x8x2_t = vld2q_u16 (0);
 }
 
-/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2Qu32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2Qu32.c
@@ -15,6 +15,6 @@
   out_uint32x4x2_t = vld2q_u32 (0);
 }
 
-/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2Qu8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2Qu8.c
@@ -15,6 +15,6 @@
   out_uint8x16x2_t = vld2q_u8 (0);
 }
 
-/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2s16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2s16.c
@@ -15,5 +15,5 @@
   out_int16x4x2_t = vld2_s16 (0);
 }
 
-/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2s32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2s32.c
@@ -15,5 +15,5 @@
   out_int32x2x2_t = vld2_s32 (0);
 }
 
-/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2s64.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2s64.c
@@ -15,5 +15,5 @@
   out_int64x1x2_t = vld2_s64 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2s8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2s8.c
@@ -15,5 +15,5 @@
   out_int8x8x2_t = vld2_s8 (0);
 }
 
-/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2u16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2u16.c
@@ -15,5 +15,5 @@
   out_uint16x4x2_t = vld2_u16 (0);
 }
 
-/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2u32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2u32.c
@@ -15,5 +15,5 @@
   out_uint32x2x2_t = vld2_u32 (0);
 }
 
-/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2u64.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2u64.c
@@ -15,5 +15,5 @@
   out_uint64x1x2_t = vld2_u64 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld2u8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2u8.c
@@ -15,5 +15,5 @@
   out_uint8x8x2_t = vld2_u8 (0);
 }
 
-/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3_dupf32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3_dupf32.c
@@ -15,5 +15,5 @@
   out_float32x2x3_t = vld3_dup_f32 (0);
 }
 
-/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3_dupp16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3_dupp16.c
@@ -15,5 +15,5 @@
   out_poly16x4x3_t = vld3_dup_p16 (0);
 }
 
-/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3_dupp8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3_dupp8.c
@@ -15,5 +15,5 @@
   out_poly8x8x3_t = vld3_dup_p8 (0);
 }
 
-/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3_dups16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3_dups16.c
@@ -15,5 +15,5 @@
   out_int16x4x3_t = vld3_dup_s16 (0);
 }
 
-/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3_dups32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3_dups32.c
@@ -15,5 +15,5 @@
   out_int32x2x3_t = vld3_dup_s32 (0);
 }
 
-/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3_dups64.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3_dups64.c
@@ -15,5 +15,5 @@
   out_int64x1x3_t = vld3_dup_s64 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3_dups8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3_dups8.c
@@ -15,5 +15,5 @@
   out_int8x8x3_t = vld3_dup_s8 (0);
 }
 
-/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3_dupu16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3_dupu16.c
@@ -15,5 +15,5 @@
   out_uint16x4x3_t = vld3_dup_u16 (0);
 }
 
-/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3_dupu32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3_dupu32.c
@@ -15,5 +15,5 @@
   out_uint32x2x3_t = vld3_dup_u32 (0);
 }
 
-/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3_dupu64.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3_dupu64.c
@@ -15,5 +15,5 @@
   out_uint64x1x3_t = vld3_dup_u64 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3_dupu8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3_dupu8.c
@@ -15,5 +15,5 @@
   out_uint8x8x3_t = vld3_dup_u8 (0);
 }
 
-/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3f32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3f32.c
@@ -15,5 +15,5 @@
   out_float32x2x3_t = vld3_f32 (0);
 }
 
-/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3_lanef32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3_lanef32.c
@@ -16,5 +16,5 @@
   out_float32x2x3_t = vld3_lane_f32 (0, arg1_float32x2x3_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3_lanep16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3_lanep16.c
@@ -16,5 +16,5 @@
   out_poly16x4x3_t = vld3_lane_p16 (0, arg1_poly16x4x3_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3_lanep8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3_lanep8.c
@@ -16,5 +16,5 @@
   out_poly8x8x3_t = vld3_lane_p8 (0, arg1_poly8x8x3_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3_lanes16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3_lanes16.c
@@ -16,5 +16,5 @@
   out_int16x4x3_t = vld3_lane_s16 (0, arg1_int16x4x3_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3_lanes32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3_lanes32.c
@@ -16,5 +16,5 @@
   out_int32x2x3_t = vld3_lane_s32 (0, arg1_int32x2x3_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3_lanes8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3_lanes8.c
@@ -16,5 +16,5 @@
   out_int8x8x3_t = vld3_lane_s8 (0, arg1_int8x8x3_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3_laneu16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3_laneu16.c
@@ -16,5 +16,5 @@
   out_uint16x4x3_t = vld3_lane_u16 (0, arg1_uint16x4x3_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3_laneu32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3_laneu32.c
@@ -16,5 +16,5 @@
   out_uint32x2x3_t = vld3_lane_u32 (0, arg1_uint32x2x3_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3_laneu8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3_laneu8.c
@@ -16,5 +16,5 @@
   out_uint8x8x3_t = vld3_lane_u8 (0, arg1_uint8x8x3_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3p16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3p16.c
@@ -15,5 +15,5 @@
   out_poly16x4x3_t = vld3_p16 (0);
 }
 
-/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3p8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3p8.c
@@ -15,5 +15,5 @@
   out_poly8x8x3_t = vld3_p8 (0);
 }
 
-/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Qf32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3Qf32.c
@@ -15,6 +15,6 @@
   out_float32x4x3_t = vld3q_f32 (0);
 }
 
-/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanef32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanef32.c
@@ -16,5 +16,5 @@
   out_float32x4x3_t = vld3q_lane_f32 (0, arg1_float32x4x3_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanep16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanep16.c
@@ -16,5 +16,5 @@
   out_poly16x8x3_t = vld3q_lane_p16 (0, arg1_poly16x8x3_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanes16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanes16.c
@@ -16,5 +16,5 @@
   out_int16x8x3_t = vld3q_lane_s16 (0, arg1_int16x8x3_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanes32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanes32.c
@@ -16,5 +16,5 @@
   out_int32x4x3_t = vld3q_lane_s32 (0, arg1_int32x4x3_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Q_laneu16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3Q_laneu16.c
@@ -16,5 +16,5 @@
   out_uint16x8x3_t = vld3q_lane_u16 (0, arg1_uint16x8x3_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Q_laneu32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3Q_laneu32.c
@@ -16,5 +16,5 @@
   out_uint32x4x3_t = vld3q_lane_u32 (0, arg1_uint32x4x3_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Qp16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3Qp16.c
@@ -15,6 +15,6 @@
   out_poly16x8x3_t = vld3q_p16 (0);
 }
 
-/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Qp8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3Qp8.c
@@ -15,6 +15,6 @@
   out_poly8x16x3_t = vld3q_p8 (0);
 }
 
-/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Qs16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3Qs16.c
@@ -15,6 +15,6 @@
   out_int16x8x3_t = vld3q_s16 (0);
 }
 
-/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Qs32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3Qs32.c
@@ -15,6 +15,6 @@
   out_int32x4x3_t = vld3q_s32 (0);
 }
 
-/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Qs8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3Qs8.c
@@ -15,6 +15,6 @@
   out_int8x16x3_t = vld3q_s8 (0);
 }
 
-/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Qu16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3Qu16.c
@@ -15,6 +15,6 @@
   out_uint16x8x3_t = vld3q_u16 (0);
 }
 
-/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Qu32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3Qu32.c
@@ -15,6 +15,6 @@
   out_uint32x4x3_t = vld3q_u32 (0);
 }
 
-/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Qu8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3Qu8.c
@@ -15,6 +15,6 @@
   out_uint8x16x3_t = vld3q_u8 (0);
 }
 
-/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3s16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3s16.c
@@ -15,5 +15,5 @@
   out_int16x4x3_t = vld3_s16 (0);
 }
 
-/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3s32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3s32.c
@@ -15,5 +15,5 @@
   out_int32x2x3_t = vld3_s32 (0);
 }
 
-/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3s64.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3s64.c
@@ -15,5 +15,5 @@
   out_int64x1x3_t = vld3_s64 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3s8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3s8.c
@@ -15,5 +15,5 @@
   out_int8x8x3_t = vld3_s8 (0);
 }
 
-/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3u16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3u16.c
@@ -15,5 +15,5 @@
   out_uint16x4x3_t = vld3_u16 (0);
 }
 
-/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3u32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3u32.c
@@ -15,5 +15,5 @@
   out_uint32x2x3_t = vld3_u32 (0);
 }
 
-/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3u64.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3u64.c
@@ -15,5 +15,5 @@
   out_uint64x1x3_t = vld3_u64 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld3u8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3u8.c
@@ -15,5 +15,5 @@
   out_uint8x8x3_t = vld3_u8 (0);
 }
 
-/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4_dupf32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4_dupf32.c
@@ -15,5 +15,5 @@
   out_float32x2x4_t = vld4_dup_f32 (0);
 }
 
-/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4_dupp16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4_dupp16.c
@@ -15,5 +15,5 @@
   out_poly16x4x4_t = vld4_dup_p16 (0);
 }
 
-/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4_dupp8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4_dupp8.c
@@ -15,5 +15,5 @@
   out_poly8x8x4_t = vld4_dup_p8 (0);
 }
 
-/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4_dups16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4_dups16.c
@@ -15,5 +15,5 @@
   out_int16x4x4_t = vld4_dup_s16 (0);
 }
 
-/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4_dups32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4_dups32.c
@@ -15,5 +15,5 @@
   out_int32x2x4_t = vld4_dup_s32 (0);
 }
 
-/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4_dups64.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4_dups64.c
@@ -15,5 +15,5 @@
   out_int64x1x4_t = vld4_dup_s64 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4_dups8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4_dups8.c
@@ -15,5 +15,5 @@
   out_int8x8x4_t = vld4_dup_s8 (0);
 }
 
-/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4_dupu16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4_dupu16.c
@@ -15,5 +15,5 @@
   out_uint16x4x4_t = vld4_dup_u16 (0);
 }
 
-/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4_dupu32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4_dupu32.c
@@ -15,5 +15,5 @@
   out_uint32x2x4_t = vld4_dup_u32 (0);
 }
 
-/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4_dupu64.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4_dupu64.c
@@ -15,5 +15,5 @@
   out_uint64x1x4_t = vld4_dup_u64 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4_dupu8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4_dupu8.c
@@ -15,5 +15,5 @@
   out_uint8x8x4_t = vld4_dup_u8 (0);
 }
 
-/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4f32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4f32.c
@@ -15,5 +15,5 @@
   out_float32x2x4_t = vld4_f32 (0);
 }
 
-/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4_lanef32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4_lanef32.c
@@ -16,5 +16,5 @@
   out_float32x2x4_t = vld4_lane_f32 (0, arg1_float32x2x4_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4_lanep16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4_lanep16.c
@@ -16,5 +16,5 @@
   out_poly16x4x4_t = vld4_lane_p16 (0, arg1_poly16x4x4_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4_lanep8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4_lanep8.c
@@ -16,5 +16,5 @@
   out_poly8x8x4_t = vld4_lane_p8 (0, arg1_poly8x8x4_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4_lanes16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4_lanes16.c
@@ -16,5 +16,5 @@
   out_int16x4x4_t = vld4_lane_s16 (0, arg1_int16x4x4_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4_lanes32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4_lanes32.c
@@ -16,5 +16,5 @@
   out_int32x2x4_t = vld4_lane_s32 (0, arg1_int32x2x4_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4_lanes8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4_lanes8.c
@@ -16,5 +16,5 @@
   out_int8x8x4_t = vld4_lane_s8 (0, arg1_int8x8x4_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4_laneu16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4_laneu16.c
@@ -16,5 +16,5 @@
   out_uint16x4x4_t = vld4_lane_u16 (0, arg1_uint16x4x4_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4_laneu32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4_laneu32.c
@@ -16,5 +16,5 @@
   out_uint32x2x4_t = vld4_lane_u32 (0, arg1_uint32x2x4_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4_laneu8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4_laneu8.c
@@ -16,5 +16,5 @@
   out_uint8x8x4_t = vld4_lane_u8 (0, arg1_uint8x8x4_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4p16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4p16.c
@@ -15,5 +15,5 @@
   out_poly16x4x4_t = vld4_p16 (0);
 }
 
-/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4p8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4p8.c
@@ -15,5 +15,5 @@
   out_poly8x8x4_t = vld4_p8 (0);
 }
 
-/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Qf32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4Qf32.c
@@ -15,6 +15,6 @@
   out_float32x4x4_t = vld4q_f32 (0);
 }
 
-/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanef32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanef32.c
@@ -16,5 +16,5 @@
   out_float32x4x4_t = vld4q_lane_f32 (0, arg1_float32x4x4_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanep16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanep16.c
@@ -16,5 +16,5 @@
   out_poly16x8x4_t = vld4q_lane_p16 (0, arg1_poly16x8x4_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanes16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanes16.c
@@ -16,5 +16,5 @@
   out_int16x8x4_t = vld4q_lane_s16 (0, arg1_int16x8x4_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanes32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanes32.c
@@ -16,5 +16,5 @@
   out_int32x4x4_t = vld4q_lane_s32 (0, arg1_int32x4x4_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Q_laneu16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4Q_laneu16.c
@@ -16,5 +16,5 @@
   out_uint16x8x4_t = vld4q_lane_u16 (0, arg1_uint16x8x4_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Q_laneu32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4Q_laneu32.c
@@ -16,5 +16,5 @@
   out_uint32x4x4_t = vld4q_lane_u32 (0, arg1_uint32x4x4_t, 1);
 }
 
-/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Qp16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4Qp16.c
@@ -15,6 +15,6 @@
   out_poly16x8x4_t = vld4q_p16 (0);
 }
 
-/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Qp8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4Qp8.c
@@ -15,6 +15,6 @@
   out_poly8x16x4_t = vld4q_p8 (0);
 }
 
-/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Qs16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4Qs16.c
@@ -15,6 +15,6 @@
   out_int16x8x4_t = vld4q_s16 (0);
 }
 
-/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Qs32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4Qs32.c
@@ -15,6 +15,6 @@
   out_int32x4x4_t = vld4q_s32 (0);
 }
 
-/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Qs8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4Qs8.c
@@ -15,6 +15,6 @@
   out_int8x16x4_t = vld4q_s8 (0);
 }
 
-/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Qu16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4Qu16.c
@@ -15,6 +15,6 @@
   out_uint16x8x4_t = vld4q_u16 (0);
 }
 
-/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Qu32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4Qu32.c
@@ -15,6 +15,6 @@
   out_uint32x4x4_t = vld4q_u32 (0);
 }
 
-/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Qu8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4Qu8.c
@@ -15,6 +15,6 @@
   out_uint8x16x4_t = vld4q_u8 (0);
 }
 
-/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4s16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4s16.c
@@ -15,5 +15,5 @@
   out_int16x4x4_t = vld4_s16 (0);
 }
 
-/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4s32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4s32.c
@@ -15,5 +15,5 @@
   out_int32x2x4_t = vld4_s32 (0);
 }
 
-/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4s64.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4s64.c
@@ -15,5 +15,5 @@
   out_int64x1x4_t = vld4_s64 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4s8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4s8.c
@@ -15,5 +15,5 @@
   out_int8x8x4_t = vld4_s8 (0);
 }
 
-/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4u16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4u16.c
@@ -15,5 +15,5 @@
   out_uint16x4x4_t = vld4_u16 (0);
 }
 
-/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4u32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4u32.c
@@ -15,5 +15,5 @@
   out_uint32x2x4_t = vld4_u32 (0);
 }
 
-/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4u64.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4u64.c
@@ -15,5 +15,5 @@
   out_uint64x1x4_t = vld4_u64 (0);
 }
 
-/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vld4u8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4u8.c
@@ -15,5 +15,5 @@
   out_uint8x8x4_t = vld4_u8 (0);
 }
 
-/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vld4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1f32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1f32.c
@@ -16,5 +16,5 @@
   vst1_f32 (arg0_float32_t, arg1_float32x2_t);
 }
 
-/* { dg-final { scan-assembler "vst1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanef32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanef32.c
@@ -16,5 +16,5 @@
   vst1_lane_f32 (arg0_float32_t, arg1_float32x2_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanep16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanep16.c
@@ -16,5 +16,5 @@
   vst1_lane_p16 (arg0_poly16_t, arg1_poly16x4_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanep8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanep8.c
@@ -16,5 +16,5 @@
   vst1_lane_p8 (arg0_poly8_t, arg1_poly8x8_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanes16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanes16.c
@@ -16,5 +16,5 @@
   vst1_lane_s16 (arg0_int16_t, arg1_int16x4_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanes32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanes32.c
@@ -16,5 +16,5 @@
   vst1_lane_s32 (arg0_int32_t, arg1_int32x2_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanes64.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanes64.c
@@ -16,5 +16,5 @@
   vst1_lane_s64 (arg0_int64_t, arg1_int64x1_t, 0);
 }
 
-/* { dg-final { scan-assembler "vst1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanes8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanes8.c
@@ -16,5 +16,5 @@
   vst1_lane_s8 (arg0_int8_t, arg1_int8x8_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1_laneu16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1_laneu16.c
@@ -16,5 +16,5 @@
   vst1_lane_u16 (arg0_uint16_t, arg1_uint16x4_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1_laneu32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1_laneu32.c
@@ -16,5 +16,5 @@
   vst1_lane_u32 (arg0_uint32_t, arg1_uint32x2_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1_laneu64.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1_laneu64.c
@@ -16,5 +16,5 @@
   vst1_lane_u64 (arg0_uint64_t, arg1_uint64x1_t, 0);
 }
 
-/* { dg-final { scan-assembler "vst1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1_laneu8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1_laneu8.c
@@ -16,5 +16,5 @@
   vst1_lane_u8 (arg0_uint8_t, arg1_uint8x8_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1p16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1p16.c
@@ -16,5 +16,5 @@
   vst1_p16 (arg0_poly16_t, arg1_poly16x4_t);
 }
 
-/* { dg-final { scan-assembler "vst1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1p8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1p8.c
@@ -16,5 +16,5 @@
   vst1_p8 (arg0_poly8_t, arg1_poly8x8_t);
 }
 
-/* { dg-final { scan-assembler "vst1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Qf32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Qf32.c
@@ -16,5 +16,5 @@
   vst1q_f32 (arg0_float32_t, arg1_float32x4_t);
 }
 
-/* { dg-final { scan-assembler "vst1\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanef32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanef32.c
@@ -16,5 +16,5 @@
   vst1q_lane_f32 (arg0_float32_t, arg1_float32x4_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanep16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanep16.c
@@ -16,5 +16,5 @@
   vst1q_lane_p16 (arg0_poly16_t, arg1_poly16x8_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanep8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanep8.c
@@ -16,5 +16,5 @@
   vst1q_lane_p8 (arg0_poly8_t, arg1_poly8x16_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes16.c
@@ -16,5 +16,5 @@
   vst1q_lane_s16 (arg0_int16_t, arg1_int16x8_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes32.c
@@ -16,5 +16,5 @@
   vst1q_lane_s32 (arg0_int32_t, arg1_int32x4_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes64.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes64.c
@@ -16,5 +16,5 @@
   vst1q_lane_s64 (arg0_int64_t, arg1_int64x2_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes8.c
@@ -16,5 +16,5 @@
   vst1q_lane_s8 (arg0_int8_t, arg1_int8x16_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu16.c
@@ -16,5 +16,5 @@
   vst1q_lane_u16 (arg0_uint16_t, arg1_uint16x8_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu32.c
@@ -16,5 +16,5 @@
   vst1q_lane_u32 (arg0_uint32_t, arg1_uint32x4_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu64.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu64.c
@@ -16,5 +16,5 @@
   vst1q_lane_u64 (arg0_uint64_t, arg1_uint64x2_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu8.c
@@ -16,5 +16,5 @@
   vst1q_lane_u8 (arg0_uint8_t, arg1_uint8x16_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Qp16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Qp16.c
@@ -16,5 +16,5 @@
   vst1q_p16 (arg0_poly16_t, arg1_poly16x8_t);
 }
 
-/* { dg-final { scan-assembler "vst1\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Qp8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Qp8.c
@@ -16,5 +16,5 @@
   vst1q_p8 (arg0_poly8_t, arg1_poly8x16_t);
 }
 
-/* { dg-final { scan-assembler "vst1\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Qs16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Qs16.c
@@ -16,5 +16,5 @@
   vst1q_s16 (arg0_int16_t, arg1_int16x8_t);
 }
 
-/* { dg-final { scan-assembler "vst1\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Qs32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Qs32.c
@@ -16,5 +16,5 @@
   vst1q_s32 (arg0_int32_t, arg1_int32x4_t);
 }
 
-/* { dg-final { scan-assembler "vst1\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Qs64.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Qs64.c
@@ -16,5 +16,5 @@
   vst1q_s64 (arg0_int64_t, arg1_int64x2_t);
 }
 
-/* { dg-final { scan-assembler "vst1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Qs8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Qs8.c
@@ -16,5 +16,5 @@
   vst1q_s8 (arg0_int8_t, arg1_int8x16_t);
 }
 
-/* { dg-final { scan-assembler "vst1\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Qu16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Qu16.c
@@ -16,5 +16,5 @@
   vst1q_u16 (arg0_uint16_t, arg1_uint16x8_t);
 }
 
-/* { dg-final { scan-assembler "vst1\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Qu32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Qu32.c
@@ -16,5 +16,5 @@
   vst1q_u32 (arg0_uint32_t, arg1_uint32x4_t);
 }
 
-/* { dg-final { scan-assembler "vst1\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Qu64.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Qu64.c
@@ -16,5 +16,5 @@
   vst1q_u64 (arg0_uint64_t, arg1_uint64x2_t);
 }
 
-/* { dg-final { scan-assembler "vst1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Qu8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Qu8.c
@@ -16,5 +16,5 @@
   vst1q_u8 (arg0_uint8_t, arg1_uint8x16_t);
 }
 
-/* { dg-final { scan-assembler "vst1\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1s16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1s16.c
@@ -16,5 +16,5 @@
   vst1_s16 (arg0_int16_t, arg1_int16x4_t);
 }
 
-/* { dg-final { scan-assembler "vst1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1s32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1s32.c
@@ -16,5 +16,5 @@
   vst1_s32 (arg0_int32_t, arg1_int32x2_t);
 }
 
-/* { dg-final { scan-assembler "vst1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1s64.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1s64.c
@@ -16,5 +16,5 @@
   vst1_s64 (arg0_int64_t, arg1_int64x1_t);
 }
 
-/* { dg-final { scan-assembler "vst1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1s8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1s8.c
@@ -16,5 +16,5 @@
   vst1_s8 (arg0_int8_t, arg1_int8x8_t);
 }
 
-/* { dg-final { scan-assembler "vst1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1u16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1u16.c
@@ -16,5 +16,5 @@
   vst1_u16 (arg0_uint16_t, arg1_uint16x4_t);
 }
 
-/* { dg-final { scan-assembler "vst1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.16\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1u32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1u32.c
@@ -16,5 +16,5 @@
   vst1_u32 (arg0_uint32_t, arg1_uint32x2_t);
 }
 
-/* { dg-final { scan-assembler "vst1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.32\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1u64.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1u64.c
@@ -16,5 +16,5 @@
   vst1_u64 (arg0_uint64_t, arg1_uint64x1_t);
 }
 
-/* { dg-final { scan-assembler "vst1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.64\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst1u8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1u8.c
@@ -16,5 +16,5 @@
   vst1_u8 (arg0_uint8_t, arg1_uint8x8_t);
 }
 
-/* { dg-final { scan-assembler "vst1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.8\[ 	\]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2f32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2f32.c
@@ -16,5 +16,5 @@
   vst2_f32 (arg0_float32_t, arg1_float32x2x2_t);
 }
 
-/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2_lanef32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2_lanef32.c
@@ -16,5 +16,5 @@
   vst2_lane_f32 (arg0_float32_t, arg1_float32x2x2_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2_lanep16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2_lanep16.c
@@ -16,5 +16,5 @@
   vst2_lane_p16 (arg0_poly16_t, arg1_poly16x4x2_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2_lanep8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2_lanep8.c
@@ -16,5 +16,5 @@
   vst2_lane_p8 (arg0_poly8_t, arg1_poly8x8x2_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2_lanes16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2_lanes16.c
@@ -16,5 +16,5 @@
   vst2_lane_s16 (arg0_int16_t, arg1_int16x4x2_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2_lanes32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2_lanes32.c
@@ -16,5 +16,5 @@
   vst2_lane_s32 (arg0_int32_t, arg1_int32x2x2_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2_lanes8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2_lanes8.c
@@ -16,5 +16,5 @@
   vst2_lane_s8 (arg0_int8_t, arg1_int8x8x2_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2_laneu16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2_laneu16.c
@@ -16,5 +16,5 @@
   vst2_lane_u16 (arg0_uint16_t, arg1_uint16x4x2_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2_laneu32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2_laneu32.c
@@ -16,5 +16,5 @@
   vst2_lane_u32 (arg0_uint32_t, arg1_uint32x2x2_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2_laneu8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2_laneu8.c
@@ -16,5 +16,5 @@
   vst2_lane_u8 (arg0_uint8_t, arg1_uint8x8x2_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2p16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2p16.c
@@ -16,5 +16,5 @@
   vst2_p16 (arg0_poly16_t, arg1_poly16x4x2_t);
 }
 
-/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2p8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2p8.c
@@ -16,5 +16,5 @@
   vst2_p8 (arg0_poly8_t, arg1_poly8x8x2_t);
 }
 
-/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Qf32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2Qf32.c
@@ -16,6 +16,6 @@
   vst2q_f32 (arg0_float32_t, arg1_float32x4x2_t);
 }
 
-/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanef32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanef32.c
@@ -16,5 +16,5 @@
   vst2q_lane_f32 (arg0_float32_t, arg1_float32x4x2_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanep16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanep16.c
@@ -16,5 +16,5 @@
   vst2q_lane_p16 (arg0_poly16_t, arg1_poly16x8x2_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanes16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanes16.c
@@ -16,5 +16,5 @@
   vst2q_lane_s16 (arg0_int16_t, arg1_int16x8x2_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanes32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanes32.c
@@ -16,5 +16,5 @@
   vst2q_lane_s32 (arg0_int32_t, arg1_int32x4x2_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Q_laneu16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2Q_laneu16.c
@@ -16,5 +16,5 @@
   vst2q_lane_u16 (arg0_uint16_t, arg1_uint16x8x2_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Q_laneu32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2Q_laneu32.c
@@ -16,5 +16,5 @@
   vst2q_lane_u32 (arg0_uint32_t, arg1_uint32x4x2_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Qp16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2Qp16.c
@@ -16,6 +16,6 @@
   vst2q_p16 (arg0_poly16_t, arg1_poly16x8x2_t);
 }
 
-/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Qp8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2Qp8.c
@@ -16,6 +16,6 @@
   vst2q_p8 (arg0_poly8_t, arg1_poly8x16x2_t);
 }
 
-/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Qs16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2Qs16.c
@@ -16,6 +16,6 @@
   vst2q_s16 (arg0_int16_t, arg1_int16x8x2_t);
 }
 
-/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Qs32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2Qs32.c
@@ -16,6 +16,6 @@
   vst2q_s32 (arg0_int32_t, arg1_int32x4x2_t);
 }
 
-/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Qs8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2Qs8.c
@@ -16,6 +16,6 @@
   vst2q_s8 (arg0_int8_t, arg1_int8x16x2_t);
 }
 
-/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Qu16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2Qu16.c
@@ -16,6 +16,6 @@
   vst2q_u16 (arg0_uint16_t, arg1_uint16x8x2_t);
 }
 
-/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Qu32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2Qu32.c
@@ -16,6 +16,6 @@
   vst2q_u32 (arg0_uint32_t, arg1_uint32x4x2_t);
 }
 
-/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Qu8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2Qu8.c
@@ -16,6 +16,6 @@
   vst2q_u8 (arg0_uint8_t, arg1_uint8x16x2_t);
 }
 
-/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2s16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2s16.c
@@ -16,5 +16,5 @@
   vst2_s16 (arg0_int16_t, arg1_int16x4x2_t);
 }
 
-/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2s32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2s32.c
@@ -16,5 +16,5 @@
   vst2_s32 (arg0_int32_t, arg1_int32x2x2_t);
 }
 
-/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2s64.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2s64.c
@@ -16,5 +16,5 @@
   vst2_s64 (arg0_int64_t, arg1_int64x1x2_t);
 }
 
-/* { dg-final { scan-assembler "vst1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2s8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2s8.c
@@ -16,5 +16,5 @@
   vst2_s8 (arg0_int8_t, arg1_int8x8x2_t);
 }
 
-/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2u16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2u16.c
@@ -16,5 +16,5 @@
   vst2_u16 (arg0_uint16_t, arg1_uint16x4x2_t);
 }
 
-/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst2\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2u32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2u32.c
@@ -16,5 +16,5 @@
   vst2_u32 (arg0_uint32_t, arg1_uint32x2x2_t);
 }
 
-/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst2\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2u64.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2u64.c
@@ -16,5 +16,5 @@
   vst2_u64 (arg0_uint64_t, arg1_uint64x1x2_t);
 }
 
-/* { dg-final { scan-assembler "vst1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst2u8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2u8.c
@@ -16,5 +16,5 @@
   vst2_u8 (arg0_uint8_t, arg1_uint8x8x2_t);
 }
 
-/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst2\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3f32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3f32.c
@@ -16,5 +16,5 @@
   vst3_f32 (arg0_float32_t, arg1_float32x2x3_t);
 }
 
-/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3_lanef32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3_lanef32.c
@@ -16,5 +16,5 @@
   vst3_lane_f32 (arg0_float32_t, arg1_float32x2x3_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3_lanep16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3_lanep16.c
@@ -16,5 +16,5 @@
   vst3_lane_p16 (arg0_poly16_t, arg1_poly16x4x3_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3_lanep8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3_lanep8.c
@@ -16,5 +16,5 @@
   vst3_lane_p8 (arg0_poly8_t, arg1_poly8x8x3_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3_lanes16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3_lanes16.c
@@ -16,5 +16,5 @@
   vst3_lane_s16 (arg0_int16_t, arg1_int16x4x3_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3_lanes32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3_lanes32.c
@@ -16,5 +16,5 @@
   vst3_lane_s32 (arg0_int32_t, arg1_int32x2x3_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3_lanes8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3_lanes8.c
@@ -16,5 +16,5 @@
   vst3_lane_s8 (arg0_int8_t, arg1_int8x8x3_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3_laneu16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3_laneu16.c
@@ -16,5 +16,5 @@
   vst3_lane_u16 (arg0_uint16_t, arg1_uint16x4x3_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3_laneu32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3_laneu32.c
@@ -16,5 +16,5 @@
   vst3_lane_u32 (arg0_uint32_t, arg1_uint32x2x3_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3_laneu8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3_laneu8.c
@@ -16,5 +16,5 @@
   vst3_lane_u8 (arg0_uint8_t, arg1_uint8x8x3_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3p16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3p16.c
@@ -16,5 +16,5 @@
   vst3_p16 (arg0_poly16_t, arg1_poly16x4x3_t);
 }
 
-/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3p8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3p8.c
@@ -16,5 +16,5 @@
   vst3_p8 (arg0_poly8_t, arg1_poly8x8x3_t);
 }
 
-/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Qf32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3Qf32.c
@@ -16,6 +16,6 @@
   vst3q_f32 (arg0_float32_t, arg1_float32x4x3_t);
 }
 
-/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanef32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanef32.c
@@ -16,5 +16,5 @@
   vst3q_lane_f32 (arg0_float32_t, arg1_float32x4x3_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanep16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanep16.c
@@ -16,5 +16,5 @@
   vst3q_lane_p16 (arg0_poly16_t, arg1_poly16x8x3_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanes16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanes16.c
@@ -16,5 +16,5 @@
   vst3q_lane_s16 (arg0_int16_t, arg1_int16x8x3_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanes32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanes32.c
@@ -16,5 +16,5 @@
   vst3q_lane_s32 (arg0_int32_t, arg1_int32x4x3_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Q_laneu16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3Q_laneu16.c
@@ -16,5 +16,5 @@
   vst3q_lane_u16 (arg0_uint16_t, arg1_uint16x8x3_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Q_laneu32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3Q_laneu32.c
@@ -16,5 +16,5 @@
   vst3q_lane_u32 (arg0_uint32_t, arg1_uint32x4x3_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Qp16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3Qp16.c
@@ -16,6 +16,6 @@
   vst3q_p16 (arg0_poly16_t, arg1_poly16x8x3_t);
 }
 
-/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Qp8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3Qp8.c
@@ -16,6 +16,6 @@
   vst3q_p8 (arg0_poly8_t, arg1_poly8x16x3_t);
 }
 
-/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Qs16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3Qs16.c
@@ -16,6 +16,6 @@
   vst3q_s16 (arg0_int16_t, arg1_int16x8x3_t);
 }
 
-/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Qs32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3Qs32.c
@@ -16,6 +16,6 @@
   vst3q_s32 (arg0_int32_t, arg1_int32x4x3_t);
 }
 
-/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Qs8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3Qs8.c
@@ -16,6 +16,6 @@
   vst3q_s8 (arg0_int8_t, arg1_int8x16x3_t);
 }
 
-/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Qu16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3Qu16.c
@@ -16,6 +16,6 @@
   vst3q_u16 (arg0_uint16_t, arg1_uint16x8x3_t);
 }
 
-/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Qu32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3Qu32.c
@@ -16,6 +16,6 @@
   vst3q_u32 (arg0_uint32_t, arg1_uint32x4x3_t);
 }
 
-/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Qu8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3Qu8.c
@@ -16,6 +16,6 @@
   vst3q_u8 (arg0_uint8_t, arg1_uint8x16x3_t);
 }
 
-/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3s16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3s16.c
@@ -16,5 +16,5 @@
   vst3_s16 (arg0_int16_t, arg1_int16x4x3_t);
 }
 
-/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3s32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3s32.c
@@ -16,5 +16,5 @@
   vst3_s32 (arg0_int32_t, arg1_int32x2x3_t);
 }
 
-/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3s64.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3s64.c
@@ -16,5 +16,5 @@
   vst3_s64 (arg0_int64_t, arg1_int64x1x3_t);
 }
 
-/* { dg-final { scan-assembler "vst1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3s8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3s8.c
@@ -16,5 +16,5 @@
   vst3_s8 (arg0_int8_t, arg1_int8x8x3_t);
 }
 
-/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3u16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3u16.c
@@ -16,5 +16,5 @@
   vst3_u16 (arg0_uint16_t, arg1_uint16x4x3_t);
 }
 
-/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst3\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3u32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3u32.c
@@ -16,5 +16,5 @@
   vst3_u32 (arg0_uint32_t, arg1_uint32x2x3_t);
 }
 
-/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst3\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3u64.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3u64.c
@@ -16,5 +16,5 @@
   vst3_u64 (arg0_uint64_t, arg1_uint64x1x3_t);
 }
 
-/* { dg-final { scan-assembler "vst1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst3u8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3u8.c
@@ -16,5 +16,5 @@
   vst3_u8 (arg0_uint8_t, arg1_uint8x8x3_t);
 }
 
-/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst3\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4f32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4f32.c
@@ -16,5 +16,5 @@
   vst4_f32 (arg0_float32_t, arg1_float32x2x4_t);
 }
 
-/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4_lanef32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4_lanef32.c
@@ -16,5 +16,5 @@
   vst4_lane_f32 (arg0_float32_t, arg1_float32x2x4_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4_lanep16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4_lanep16.c
@@ -16,5 +16,5 @@
   vst4_lane_p16 (arg0_poly16_t, arg1_poly16x4x4_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4_lanep8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4_lanep8.c
@@ -16,5 +16,5 @@
   vst4_lane_p8 (arg0_poly8_t, arg1_poly8x8x4_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4_lanes16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4_lanes16.c
@@ -16,5 +16,5 @@
   vst4_lane_s16 (arg0_int16_t, arg1_int16x4x4_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4_lanes32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4_lanes32.c
@@ -16,5 +16,5 @@
   vst4_lane_s32 (arg0_int32_t, arg1_int32x2x4_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4_lanes8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4_lanes8.c
@@ -16,5 +16,5 @@
   vst4_lane_s8 (arg0_int8_t, arg1_int8x8x4_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4_laneu16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4_laneu16.c
@@ -16,5 +16,5 @@
   vst4_lane_u16 (arg0_uint16_t, arg1_uint16x4x4_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4_laneu32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4_laneu32.c
@@ -16,5 +16,5 @@
   vst4_lane_u32 (arg0_uint32_t, arg1_uint32x2x4_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4_laneu8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4_laneu8.c
@@ -16,5 +16,5 @@
   vst4_lane_u8 (arg0_uint8_t, arg1_uint8x8x4_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4p16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4p16.c
@@ -16,5 +16,5 @@
   vst4_p16 (arg0_poly16_t, arg1_poly16x4x4_t);
 }
 
-/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4p8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4p8.c
@@ -16,5 +16,5 @@
   vst4_p8 (arg0_poly8_t, arg1_poly8x8x4_t);
 }
 
-/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Qf32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4Qf32.c
@@ -16,6 +16,6 @@
   vst4q_f32 (arg0_float32_t, arg1_float32x4x4_t);
 }
 
-/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanef32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanef32.c
@@ -16,5 +16,5 @@
   vst4q_lane_f32 (arg0_float32_t, arg1_float32x4x4_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanep16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanep16.c
@@ -16,5 +16,5 @@
   vst4q_lane_p16 (arg0_poly16_t, arg1_poly16x8x4_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanes16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanes16.c
@@ -16,5 +16,5 @@
   vst4q_lane_s16 (arg0_int16_t, arg1_int16x8x4_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanes32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanes32.c
@@ -16,5 +16,5 @@
   vst4q_lane_s32 (arg0_int32_t, arg1_int32x4x4_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Q_laneu16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4Q_laneu16.c
@@ -16,5 +16,5 @@
   vst4q_lane_u16 (arg0_uint16_t, arg1_uint16x8x4_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Q_laneu32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4Q_laneu32.c
@@ -16,5 +16,5 @@
   vst4q_lane_u32 (arg0_uint32_t, arg1_uint32x4x4_t, 1);
 }
 
-/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Qp16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4Qp16.c
@@ -16,6 +16,6 @@
   vst4q_p16 (arg0_poly16_t, arg1_poly16x8x4_t);
 }
 
-/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Qp8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4Qp8.c
@@ -16,6 +16,6 @@
   vst4q_p8 (arg0_poly8_t, arg1_poly8x16x4_t);
 }
 
-/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Qs16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4Qs16.c
@@ -16,6 +16,6 @@
   vst4q_s16 (arg0_int16_t, arg1_int16x8x4_t);
 }
 
-/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Qs32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4Qs32.c
@@ -16,6 +16,6 @@
   vst4q_s32 (arg0_int32_t, arg1_int32x4x4_t);
 }
 
-/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Qs8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4Qs8.c
@@ -16,6 +16,6 @@
   vst4q_s8 (arg0_int8_t, arg1_int8x16x4_t);
 }
 
-/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Qu16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4Qu16.c
@@ -16,6 +16,6 @@
   vst4q_u16 (arg0_uint16_t, arg1_uint16x8x4_t);
 }
 
-/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Qu32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4Qu32.c
@@ -16,6 +16,6 @@
   vst4q_u32 (arg0_uint32_t, arg1_uint32x4x4_t);
 }
 
-/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Qu8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4Qu8.c
@@ -16,6 +16,6 @@
   vst4q_u8 (arg0_uint8_t, arg1_uint8x16x4_t);
 }
 
-/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4s16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4s16.c
@@ -16,5 +16,5 @@
   vst4_s16 (arg0_int16_t, arg1_int16x4x4_t);
 }
 
-/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4s32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4s32.c
@@ -16,5 +16,5 @@
   vst4_s32 (arg0_int32_t, arg1_int32x2x4_t);
 }
 
-/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4s64.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4s64.c
@@ -16,5 +16,5 @@
   vst4_s64 (arg0_int64_t, arg1_int64x1x4_t);
 }
 
-/* { dg-final { scan-assembler "vst1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4s8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4s8.c
@@ -16,5 +16,5 @@
   vst4_s8 (arg0_int8_t, arg1_int8x8x4_t);
 }
 
-/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4u16.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4u16.c
@@ -16,5 +16,5 @@
   vst4_u16 (arg0_uint16_t, arg1_uint16x4x4_t);
 }
 
-/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst4\.16\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4u32.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4u32.c
@@ -16,5 +16,5 @@
   vst4_u32 (arg0_uint32_t, arg1_uint32x2x4_t);
 }
 
-/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst4\.32\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4u64.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4u64.c
@@ -16,5 +16,5 @@
   vst4_u64 (arg0_uint64_t, arg1_uint64x1x4_t);
 }
 
-/* { dg-final { scan-assembler "vst1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst1\.64\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon/vst4u8.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4u8.c
@@ -16,5 +16,5 @@
   vst4_u8 (arg0_uint8_t, arg1_uint8x8x4_t);
 }
 
-/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
+/* { dg-final { scan-assembler "vst4\.8\[ 	\]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ 	\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon-combine-sub-abs-into-vabd.c
@@ -0,0 +1,50 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O2 -funsafe-math-optimizations" } */
+/* { dg-add-options arm_neon } */
+
+#include <arm_neon.h>
+float32x2_t f_sub_abs_to_vabd_32()
+{
+  float32x2_t val1 = vdup_n_f32 (10);
+  float32x2_t val2 = vdup_n_f32 (30);
+  float32x2_t sres = vsub_f32(val1, val2);
+  float32x2_t res = vabs_f32 (sres);
+
+  return res;
+}
+/* { dg-final { scan-assembler "vabd\.f32" } }*/
+
+#include <arm_neon.h>
+int8x8_t sub_abs_to_vabd_8()
+{
+  int8x8_t val1 = vdup_n_s8 (10);
+  int8x8_t val2 = vdup_n_s8 (30);
+  int8x8_t sres = vsub_s8(val1, val2);
+  int8x8_t res = vabs_s8 (sres);
+
+  return res;
+}
+/* { dg-final { scan-assembler "vabd\.s8" } }*/
+
+int16x4_t sub_abs_to_vabd_16()
+{
+  int16x4_t val1 = vdup_n_s16 (10);
+  int16x4_t val2 = vdup_n_s16 (30);
+  int16x4_t sres = vsub_s16(val1, val2);
+  int16x4_t res = vabs_s16 (sres);
+
+  return res;
+}
+/* { dg-final { scan-assembler "vabd\.s16" } }*/
+
+int32x2_t sub_abs_to_vabd_32()
+{
+  int32x2_t val1 = vdup_n_s32 (10);
+  int32x2_t val2 = vdup_n_s32 (30);
+  int32x2_t sres = vsub_s32(val1, val2);
+  int32x2_t res = vabs_s32 (sres);
+
+   return res;
+}
+/* { dg-final { scan-assembler "vabd\.s32" } }*/
--- a/src/gcc/testsuite/gcc.target/arm/neon-modes-2.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon-modes-2.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O1" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+
+#define SETUP(A) x##A = vld3_u32 (ptr + A * 0x20)
+#define MODIFY(A) x##A = vld3_lane_u32 (ptr + A * 0x20 + 0x10, x##A, 1)
+#define STORE(A) vst3_u32 (ptr + A * 0x20, x##A)
+
+#define MANY(A) A (0), A (1), A (2), A (3), A (4), A (5)
+
+void
+bar (uint32_t *ptr, int y)
+{
+  uint32x2x3_t MANY (SETUP);
+  int *x = __builtin_alloca (y);
+  int z[0x1000];
+  foo (x, z);
+  MANY (MODIFY);
+  foo (x, z);
+  MANY (STORE);
+}
--- a/src/gcc/testsuite/gcc.target/arm/neon-modes-3.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon-modes-3.c
@@ -0,0 +1,61 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O" } */
+/* { dg-add-options arm_neon } */
+
+#include <arm_neon.h>
+
+void f1 (volatile float32x4_t *dest, volatile float32x4x4_t *src, int n)
+{
+  float32x4x4_t a5, a6, a7, a8, a9;
+  int i;
+
+  a5 = *src;
+  a6 = *src;
+  a7 = *src;
+  a8 = *src;
+  a9 = *src;
+  while (n--)
+    {
+      for (i = 0; i < 8; i++)
+	{
+	  float32x4x4_t a0, a1, a2, a3, a4;
+
+	  a0 = *src;
+	  a1 = *src;
+	  a2 = *src;
+	  a3 = *src;
+	  a4 = *src;
+	  *src = a0;
+	  *dest = a0.val[0];
+	  *dest = a0.val[3];
+	  *src = a1;
+	  *dest = a1.val[0];
+	  *dest = a1.val[3];
+	  *src = a2;
+	  *dest = a2.val[0];
+	  *dest = a2.val[3];
+	  *src = a3;
+	  *dest = a3.val[0];
+	  *dest = a3.val[3];
+	  *src = a4;
+	  *dest = a4.val[0];
+	  *dest = a4.val[3];
+	}
+      *src = a5;
+      *dest = a5.val[0];
+      *dest = a5.val[3];
+      *src = a6;
+      *dest = a6.val[0];
+      *dest = a6.val[3];
+      *src = a7;
+      *dest = a7.val[0];
+      *dest = a7.val[3];
+      *src = a8;
+      *dest = a8.val[0];
+      *dest = a8.val[3];
+      *src = a9;
+      *dest = a9.val[0];
+      *dest = a9.val[3];
+    }
+}
--- a/src/gcc/testsuite/gcc.target/arm/neon-vld3-1.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon-vld3-1.c
@@ -0,0 +1,27 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+
+uint32_t buffer[12];
+
+void __attribute__((noinline))
+foo (uint32_t *a)
+{
+  uint32x4x3_t x;
+
+  x = vld3q_u32 (a);
+  x.val[0] = vaddq_u32 (x.val[0], x.val[1]);
+  vst3q_u32 (a, x);
+}
+
+int
+main (void)
+{
+  buffer[0] = 1;
+  buffer[1] = 2;
+  foo (buffer);
+  return buffer[0] != 3;
+}
--- a/src/gcc/testsuite/gcc.target/arm/neon-vlshr-imm-1.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon-vlshr-imm-1.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O2 -mfpu=neon -mfloat-abi=softfp -ftree-vectorize" } */
+/* { dg-final { scan-assembler "vshr\.u32.*#3" } } */
+
+/* Verify that VSHR immediate is used.  */
+void f1(int n, unsigned int x[], unsigned int y[]) {
+  int i;
+  for (i = 0; i < n; ++i)
+    y[i] = x[i] >> 3;
+}
--- a/src/gcc/testsuite/gcc.target/arm/neon-vorn-vbic.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon-vorn-vbic.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+/* { dg-add-options arm_neon } */
+
+void bor (int *__restrict__ c, int *__restrict__ a, int *__restrict__ b)
+{
+  int i;
+  for (i = 0; i < 9; i++)
+    c[i] = b[i] | (~a[i]);
+}
+void bic (int *__restrict__ c, int *__restrict__ a, int *__restrict__ b)
+{
+  int i;
+  for (i = 0; i < 9; i++)
+    c[i] = b[i] & (~a[i]);
+}
+
+/* { dg-final { scan-assembler "vorn\\t" } } */
+/* { dg-final { scan-assembler "vbic\\t" } } */
--- a/src/gcc/testsuite/gcc.target/arm/neon-vshl-imm-1.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon-vshl-imm-1.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O2 -mfpu=neon -mfloat-abi=softfp -ftree-vectorize" } */
+/* { dg-final { scan-assembler "vshl\.i32.*#3" } } */
+
+/* Verify that VSHR immediate is used.  */
+void f1(int n, int x[], int y[]) {
+  int i;
+  for (i = 0; i < n; ++i)
+    y[i] = x[i] << 3;
+}
--- a/src/gcc/testsuite/gcc.target/arm/neon-vshr-imm-1.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon-vshr-imm-1.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O2 -mfpu=neon -mfloat-abi=softfp -ftree-vectorize" } */
+/* { dg-final { scan-assembler "vshr\.s32.*#3" } } */
+
+/* Verify that VSHR immediate is used.  */
+void f1(int n, int x[], int y[]) {
+  int i;
+  for (i = 0; i < n; ++i)
+    y[i] = x[i] >> 3;
+}
--- a/src/gcc/testsuite/gcc.target/arm/neon-vst3-1.c
+++ b/src/gcc/testsuite/gcc.target/arm/neon-vst3-1.c
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+
+uint32_t buffer[64];
+
+void __attribute__((noinline))
+foo (uint32_t *a)
+{
+  uint32x4x3_t x;
+
+  x = vld3q_u32 (a);
+  a[35] = 1;
+  vst3q_lane_u32 (a + 32, x, 1);
+}
+
+int
+main (void)
+{
+  foo (buffer);
+  return buffer[35] != 1;
+}
--- a/src/gcc/testsuite/gcc.target/arm/no-wmla-1.c
+++ b/src/gcc/testsuite/gcc.target/arm/no-wmla-1.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv7-a" } */
+
+int
+foo (int a, short b, short c)
+{
+     int bc = b * c;
+        return a + (short)bc;
+}
+
+/* { dg-final { scan-assembler "mul" } } */
--- a/src/gcc/testsuite/gcc.target/arm/pr46329.c
+++ b/src/gcc/testsuite/gcc.target/arm/pr46329.c
@@ -0,0 +1,9 @@
+/* { dg-options "-O2" } */
+/* { dg-add-options arm_neon } */
+
+int __attribute__ ((vector_size (32))) x;
+void
+foo (void)
+{
+  x <<= x;
+}
--- a/src/gcc/testsuite/gcc.target/arm/pr48183.c
+++ b/src/gcc/testsuite/gcc.target/arm/pr48183.c
@@ -0,0 +1,25 @@
+/* testsuite/gcc.target/arm/pr48183.c */
+
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O -g" } */
+/* { dg-add-options arm_neon } */
+
+#include <arm_neon.h>
+
+void move_16bit_to_32bit (int32_t *dst, const short *src, unsigned n)
+{
+    unsigned i;
+    int16x4x2_t input;
+    int32x4x2_t mid;
+    int32x4x2_t output;
+
+    for (i = 0; i < n/2; i += 8) {
+        input = vld2_s16(src + i);
+        mid.val[0] = vmovl_s16(input.val[0]);
+        mid.val[1] = vmovl_s16(input.val[1]);
+        output.val[0] = vshlq_n_s32(mid.val[0], 8);
+        output.val[1] = vshlq_n_s32(mid.val[1], 8);
+        vst2q_s32((int32_t *)dst + i, output);
+    }
+}
--- a/src/gcc/testsuite/gcc.target/arm/pr50099.c
+++ b/src/gcc/testsuite/gcc.target/arm/pr50099.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+long long foo (signed char * arg)
+{
+  long long temp_1;
+
+  temp_1 = arg[256]; 
+  return temp_1;
+}
--- a/src/gcc/testsuite/gcc.target/arm/pr50305.c
+++ b/src/gcc/testsuite/gcc.target/arm/pr50305.c
@@ -0,0 +1,60 @@
+/* { dg-do compile } */
+/* { dg-skip-if "incompatible options" { arm*-*-* } { "-march=*" } { "-march=armv7-a" } } */
+/* { dg-options "-O2 -fno-omit-frame-pointer -marm -march=armv7-a -mfpu=vfp3" } */
+
+struct event {
+ unsigned long long id;
+ unsigned int flag;
+};
+
+void dummy(void)
+{
+  /* This is here to ensure that the offset of perf_event_id below
+     relative to the LANCHOR symbol exceeds the allowed displacement.  */
+  static int __warned[300];
+ __warned[0] = 1;
+}
+
+extern void *kmem_cache_alloc_trace (void *cachep);
+extern void *cs_cachep;
+extern int nr_cpu_ids;
+
+struct event *
+event_alloc (int cpu)
+{
+ static unsigned long long __attribute__((aligned(8))) perf_event_id;
+ struct event *event;
+ unsigned long long result;
+ unsigned long tmp;
+
+ if (cpu >= nr_cpu_ids)
+  return 0;
+
+ event = kmem_cache_alloc_trace (cs_cachep);
+
+ __asm__ __volatile__ ("dmb" : : : "memory");
+
+ __asm__ __volatile__("@ atomic64_add_return\n"
+"1:	ldrexd	%0, %H0, [%3]\n"
+"	adds	%0, %0, %4\n"
+"	adc	%H0, %H0, %H4\n"
+"	strexd	%1, %0, %H0, [%3]\n"
+"	teq	%1, #0\n"
+"	bne	1b"
+ : "=&r" (result), "=&r" (tmp), "+Qo" (perf_event_id)
+ : "r" (&perf_event_id), "r" (1LL)
+ : "cc");
+
+ __asm__ __volatile__ ("dmb" : : : "memory");
+
+ event->id = result;
+
+ if (cpu)
+  event->flag = 1;
+
+ for (cpu = 0; cpu < nr_cpu_ids; cpu++)
+   kmem_cache_alloc_trace (cs_cachep);
+
+ return event;
+}
+
--- a/src/gcc/testsuite/gcc.target/arm/pr50318-1.c
+++ b/src/gcc/testsuite/gcc.target/arm/pr50318-1.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-require-effective-target arm_dsp } */
+
+long long test (unsigned int sec, unsigned long long nsecs)
+{
+   return (long long)(long)sec * 1000000000L + (long long)(unsigned
+   long)nsecs;
+}
+
+/* { dg-final { scan-assembler "umlal" } } */
--- a/src/gcc/testsuite/gcc.target/arm/pr52633.c
+++ b/src/gcc/testsuite/gcc.target/arm/pr52633.c
@@ -0,0 +1,13 @@
+/* PR tree-optimization/52633 */
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-march=armv7-a -mfloat-abi=softfp -mfpu=neon -O -ftree-vectorize" } */
+
+void
+test (unsigned short *x, signed char *y)
+{
+  int i;
+  for (i = 0; i < 32; i++)
+    x[i] = (short) (y[i] << 5);
+}
+
--- a/src/gcc/testsuite/gcc.target/arm/pr52686.c
+++ b/src/gcc/testsuite/gcc.target/arm/pr52686.c
@@ -0,0 +1,19 @@
+/* PR target/52375 */
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-march=armv7-a -mfloat-abi=softfp -mfpu=neon -O -ftree-vectorize" } */
+
+unsigned int output[4];
+
+void test (unsigned short *p)
+{
+  unsigned int x = *p;
+  if (x)
+    {
+      output[0] = x << 1;
+      output[1] = x << 1;
+      output[2] = x << 1;
+      output[3] = x << 1;
+    }
+}
+
--- a/src/gcc/testsuite/gcc.target/arm/pr53636.c
+++ b/src/gcc/testsuite/gcc.target/arm/pr53636.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O -ftree-vectorize" } */
+/* { dg-add-options arm_neon } */
+
+void fill (short *buf) __attribute__ ((noinline));
+void fill (short *buf)
+{
+  int i;
+
+  for (i = 0; i < 11 * 8; i++)
+    buf[i] = i;
+}
+
+void test (unsigned char *dst) __attribute__ ((noinline));
+void test (unsigned char *dst)
+{
+  short tmp[11 * 8], *tptr;
+  int i;
+
+  fill (tmp);
+
+  tptr = tmp;
+  for (i = 0; i < 8; i++)
+    {
+      dst[0] = (-tptr[0] + 9 * tptr[0 + 1] + 9 * tptr[0 + 2] - tptr[0 + 3]) >> 7;
+      dst[1] = (-tptr[1] + 9 * tptr[1 + 1] + 9 * tptr[1 + 2] - tptr[1 + 3]) >> 7;
+      dst[2] = (-tptr[2] + 9 * tptr[2 + 1] + 9 * tptr[2 + 2] - tptr[2 + 3]) >> 7;
+      dst[3] = (-tptr[3] + 9 * tptr[3 + 1] + 9 * tptr[3 + 2] - tptr[3 + 3]) >> 7;
+      dst[4] = (-tptr[4] + 9 * tptr[4 + 1] + 9 * tptr[4 + 2] - tptr[4 + 3]) >> 7;
+      dst[5] = (-tptr[5] + 9 * tptr[5 + 1] + 9 * tptr[5 + 2] - tptr[5 + 3]) >> 7;
+      dst[6] = (-tptr[6] + 9 * tptr[6 + 1] + 9 * tptr[6 + 2] - tptr[6 + 3]) >> 7;
+      dst[7] = (-tptr[7] + 9 * tptr[7 + 1] + 9 * tptr[7 + 2] - tptr[7 + 3]) >> 7;
+
+      dst += 8;
+      tptr += 11;
+    }
+}
+
+int main (void)
+{
+  char buf [8 * 8];
+
+  test (buf);
+
+  return 0;
+}
+
--- a/src/gcc/testsuite/gcc.target/arm/sat-1.c
+++ b/src/gcc/testsuite/gcc.target/arm/sat-1.c
@@ -0,0 +1,64 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arm_ok } */
+/* { dg-require-effective-target arm_arch_v6_ok } */
+/* { dg-options "-O2 -marm" } */
+/* { dg-add-options arm_arch_v6 } */
+
+
+static inline int sat1 (int a, int amin, int amax)
+{
+  if      (a < amin) return amin;
+  else if (a > amax) return amax;
+  else               return a;
+}
+
+static inline int sat2 (int a, int amin, int amax)
+{
+  if      (a > amax) return amax;
+  else if (a < amin) return amin;
+  else               return a;
+}
+
+int u1 (int x)
+{
+  return sat1 (x, 0, 63);
+}
+
+int us1 (int x)
+{
+  return sat1 (x >> 5, 0, 63);
+}
+
+int s1 (int x)
+{
+  return sat1 (x, -64, 63);
+}
+
+int ss1 (int x)
+{
+  return sat1 (x >> 5, -64, 63);
+}
+
+int u2 (int x)
+{
+  return sat2 (x, 0, 63);
+}
+
+int us2 (int x)
+{
+  return sat2 (x >> 5, 0, 63);
+}
+
+int s2 (int x)
+{
+  return sat2 (x, -64, 63);
+}
+
+int ss2 (int x)
+{
+  return sat2 (x >> 5, -64, 63);
+}
+
+/* { dg-final { scan-assembler-times "usat" 4 } } */
+/* { dg-final { scan-assembler-times "ssat" 4 } } */
+
--- a/src/gcc/testsuite/gcc.target/arm/shiftable.c
+++ b/src/gcc/testsuite/gcc.target/arm/shiftable.c
@@ -0,0 +1,63 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-require-effective-target arm32 } */
+
+/* ARM has shift-and-alu insns.  Depending on the ALU op GCC represents some
+   of these as a left shift, others as a multiply.  Check that we match the
+    right one.  */
+
+int
+plus (int a, int b)
+{
+  return (a * 64) + b;
+}
+
+/* { dg-final { scan-assembler "add.*\[al]sl #6" } } */
+
+int
+minus (int a, int b)
+{
+  return a - (b * 64);
+}
+
+/* { dg-final { scan-assembler "sub.*\[al]sl #6" } } */
+
+int
+ior (int a, int b)
+{
+  return (a * 64) | b;
+}
+
+/* { dg-final { scan-assembler "orr.*\[al]sl #6" } } */
+
+int
+xor (int a, int b)
+{
+  return (a * 64) ^ b;
+}
+
+/* { dg-final { scan-assembler "eor.*\[al]sl #6" } } */
+
+int
+and (int a, int b)
+{
+  return (a * 64) & b;
+}
+
+/* { dg-final { scan-assembler "and.*\[al]sl #6" } } */
+
+int
+rsb (int a, int b)
+{
+  return (a * 64) - b;
+}
+
+/* { dg-final { scan-assembler "rsb.*\[al]sl #6" } } */
+
+int
+mvn (int a, int b)
+{
+  return ~(a * 64);
+}
+
+/* { dg-final { scan-assembler "mvn.*\[al]sl #6" } } */
--- a/src/gcc/testsuite/gcc.target/arm/smlaltb-1.c
+++ b/src/gcc/testsuite/gcc.target/arm/smlaltb-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv7-a" } */
+
+long long int
+foo (long long x, int in)
+{
+  short a = in & 0xffff;
+  short b = (in & 0xffff0000) >> 16;
+
+  return x + b * a;
+}
+
+/* { dg-final { scan-assembler "smlaltb" } } */
--- a/src/gcc/testsuite/gcc.target/arm/smlaltt-1.c
+++ b/src/gcc/testsuite/gcc.target/arm/smlaltt-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv7-a" } */
+
+long long int
+foo (long long x, int in1, int in2)
+{
+  short a = (in1 & 0xffff0000) >> 16;
+  short b = (in2 & 0xffff0000) >> 16;
+
+  return x + b * a;
+}
+
+/* { dg-final { scan-assembler "smlaltt" } } */
--- a/src/gcc/testsuite/gcc.target/arm/smlatb-1.c
+++ b/src/gcc/testsuite/gcc.target/arm/smlatb-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv7-a" } */
+
+int
+foo (int x, int in)
+{
+  short a = in & 0xffff;
+  short b = (in & 0xffff0000) >> 16;
+
+  return x + b * a;
+}
+
+/* { dg-final { scan-assembler "smlatb" } } */
--- a/src/gcc/testsuite/gcc.target/arm/smlatt-1.c
+++ b/src/gcc/testsuite/gcc.target/arm/smlatt-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv7-a" } */
+
+int
+foo (int x, int in1, int in2)
+{
+  short a = (in1 & 0xffff0000) >> 16;
+  short b = (in2 & 0xffff0000) >> 16;
+
+  return x + b * a;
+}
+
+/* { dg-final { scan-assembler "smlatt" } } */
--- a/src/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-1.c
+++ b/src/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-1.c
@@ -0,0 +1,13 @@
+/* Use conditional compare */
+/* { dg-options "-O2" } */
+/* { dg-skip-if "" { arm_thumb1_ok } } */
+/* { dg-final { scan-assembler "cmpne" } } */
+
+int f(int i, int j) 
+{
+  if ( (i == '+') || (j == '-') ) {
+    return 1;
+  } else {
+    return 0;
+  }
+}
--- a/src/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-2.c
+++ b/src/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-2.c
@@ -0,0 +1,13 @@
+/* Use conditional compare */                                                                                         
+/* { dg-options "-O2" } */
+/* { dg-skip-if "" { arm_thumb1_ok } } */
+/* { dg-final { scan-assembler "cmpeq" } } */
+
+int f(int i, int j) 
+{
+  if ( (i == '+') && (j == '-') ) {
+    return 1;
+  } else {
+    return 0;
+  }
+}
--- a/src/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-3.c
+++ b/src/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-3.c
@@ -0,0 +1,12 @@
+/* Use conditional compare */                                                                                         
+/* { dg-options "-O2" } */
+/* { dg-skip-if "" { arm_thumb1_ok } } */
+/* { dg-final { scan-assembler "cmpgt" } } */
+
+int f(int i, int j)
+{
+  if ( (i >= '+') ? (j > '-') : 0)
+    return 1;
+  else
+    return 0;
+}
--- a/src/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-4.c
+++ b/src/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-4.c
@@ -0,0 +1,12 @@
+/* Use conditional compare */                                                                                         
+/* { dg-options "-O2" } */
+/* { dg-skip-if "" { arm_thumb1_ok } } */
+/* { dg-final { scan-assembler "cmpgt" } } */
+
+int f(int i, int j)
+{
+  if ( (i >= '+') ? (j <= '-') : 1)
+    return 1;
+  else
+    return 0;
+}
--- a/src/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant1.c
+++ b/src/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant1.c
@@ -0,0 +1,27 @@
+/* Ensure simple replicated constant immediates work.  */
+/* { dg-options "-mthumb -O2" } */
+/* { dg-require-effective-target arm_thumb2_ok } */
+
+int
+foo1 (int a)
+{
+  return a + 0xfefefefe;
+}
+
+/* { dg-final { scan-assembler "add.*#-16843010" } } */
+
+int
+foo2 (int a)
+{
+  return a - 0xab00ab00;
+}
+
+/* { dg-final { scan-assembler "sub.*#-1426019584" } } */
+
+int
+foo3 (int a)
+{
+  return a & 0x00cd00cd;
+}
+
+/* { dg-final { scan-assembler "and.*#13435085" } } */
--- a/src/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant2.c
+++ b/src/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant2.c
@@ -0,0 +1,75 @@
+/* Ensure split constants can use replicated patterns.  */
+/* { dg-options "-mthumb -O2" } */
+/* { dg-require-effective-target arm_thumb2_ok } */
+
+int
+foo1 (int a)
+{
+  return a + 0xfe00fe01;
+}
+
+/* { dg-final { scan-assembler "add.*#-33489408" } } */
+/* { dg-final { scan-assembler "add.*#1" } } */
+
+int
+foo2 (int a)
+{
+  return a + 0xdd01dd00;
+}
+
+/* { dg-final { scan-assembler "add.*#-587145984" } } */
+/* { dg-final { scan-assembler "add.*#65536" } } */
+
+int
+foo3 (int a)
+{
+  return a + 0x00443344;
+}
+
+/* { dg-final { scan-assembler "add.*#4456516" } } */
+/* { dg-final { scan-assembler "add.*#13056" } } */
+
+int
+foo4 (int a)
+{
+  return a + 0x77330033;
+}
+
+/* { dg-final { scan-assembler "add.*#1996488704" } } */
+/* { dg-final { scan-assembler "add.*#3342387" } } */
+
+int
+foo5 (int a)
+{
+  return a + 0x11221122;
+}
+
+/* { dg-final { scan-assembler "add.*#285217024" } } */
+/* { dg-final { scan-assembler "add.*#2228258" } } */
+
+int
+foo6 (int a)
+{
+  return a + 0x66666677;
+}
+
+/* { dg-final { scan-assembler "add.*#1717986918" } } */
+/* { dg-final { scan-assembler "add.*#17" } } */
+
+int
+foo7 (int a)
+{
+  return a + 0x99888888;
+}
+
+/* { dg-final { scan-assembler "add.*#-2004318072" } } */
+/* { dg-final { scan-assembler "add.*#285212672" } } */
+
+int
+foo8 (int a)
+{
+  return a + 0xdddddfff;
+}
+
+/* { dg-final { scan-assembler "add.*#-572662307" } } */
+/* { dg-final { scan-assembler "addw.*#546" } } */
--- a/src/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant3.c
+++ b/src/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant3.c
@@ -0,0 +1,28 @@
+/* Ensure negated/inverted replicated constant immediates work.  */
+/* { dg-options "-mthumb -O2" } */
+/* { dg-require-effective-target arm_thumb2_ok } */
+
+int
+foo1 (int a)
+{
+  return a | 0xffffff00;
+}
+
+/* { dg-final { scan-assembler "orn.*#255" } } */
+
+int
+foo2 (int a)
+{
+  return a & 0xffeeffee;
+}
+
+/* { dg-final { scan-assembler "bic.*#1114129" } } */
+
+int
+foo3 (int a)
+{
+  return a & 0xaaaaaa00;
+}
+
+/* { dg-final { scan-assembler "and.*#-1431655766" } } */
+/* { dg-final { scan-assembler "bic.*#170" } } */
--- a/src/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant4.c
+++ b/src/gcc/testsuite/gcc.target/arm/thumb2-replicated-constant4.c
@@ -0,0 +1,22 @@
+/* Ensure replicated constants don't make things worse.  */
+/* { dg-options "-mthumb -O2" } */
+/* { dg-require-effective-target arm_thumb2_ok } */
+
+int
+foo1 (int a)
+{
+  /* It might be tempting to use 0x01000100, but it wouldn't help. */
+  return a + 0x01f001e0;
+}
+
+/* { dg-final { scan-assembler "add.*#32505856" } } */
+/* { dg-final { scan-assembler "add.*#480" } } */
+
+int
+foo2 (int a)
+{
+  return a + 0x0f100e10;
+}
+
+/* { dg-final { scan-assembler "add.*#252706816" } } */
+/* { dg-final { scan-assembler "add.*#3600" } } */
--- a/src/gcc/testsuite/gcc.target/arm/unaligned-memcpy-1.c
+++ b/src/gcc/testsuite/gcc.target/arm/unaligned-memcpy-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_unaligned } */
+/* { dg-options "-O2" } */
+
+#include <string.h>
+
+void unknown_alignment (char *dest, char *src)
+{
+  memcpy (dest, src, 15);
+}
+
+/* We should see three unaligned word loads and store pairs, one unaligned
+   ldrh/strh pair, and an ldrb/strb pair.  Sanity check that.  */
+
+/* { dg-final { scan-assembler-times "@ unaligned" 8 } } */
+/* { dg-final { scan-assembler-times "ldrh" 1 } } */
+/* { dg-final { scan-assembler-times "strh" 1 } } */
+/* { dg-final { scan-assembler-times "ldrb" 1 } } */
+/* { dg-final { scan-assembler-times "strb" 1 } } */
--- a/src/gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c
+++ b/src/gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_unaligned } */
+/* { dg-options "-O2" } */
+
+#include <string.h>
+
+char dest[16];
+
+void aligned_dest (char *src)
+{
+  memcpy (dest, src, 15);
+}
+
+/* Expect a multi-word store for the main part of the copy, but subword
+   loads/stores for the remainder.  */
+
+/* { dg-final { scan-assembler-times "stmia" 1 } } */
+/* { dg-final { scan-assembler-times "ldrh" 1 } } */
+/* { dg-final { scan-assembler-times "strh" 1 } } */
+/* { dg-final { scan-assembler-times "ldrb" 1 } } */
+/* { dg-final { scan-assembler-times "strb" 1 } } */
--- a/src/gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c
+++ b/src/gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_unaligned } */
+/* { dg-options "-O2" } */
+
+#include <string.h>
+
+char src[16];
+
+void aligned_src (char *dest)
+{
+  memcpy (dest, src, 15);
+}
+
+/* Expect a multi-word load for the main part of the copy, but subword
+   loads/stores for the remainder.  */
+
+/* { dg-final { scan-assembler-times "ldmia" 1 } } */
+/* { dg-final { scan-assembler-times "ldrh" 1 } } */
+/* { dg-final { scan-assembler-times "strh" 1 } } */
+/* { dg-final { scan-assembler-times "ldrb" 1 } } */
+/* { dg-final { scan-assembler-times "strb" 1 } } */
--- a/src/gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c
+++ b/src/gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_unaligned } */
+/* { dg-options "-O2" } */
+
+#include <string.h>
+
+char src[16];
+char dest[16];
+
+void aligned_both (void)
+{
+  memcpy (dest, src, 15);
+}
+
+/* We know both src and dest to be aligned: expect multiword loads/stores.  */
+
+/* { dg-final { scan-assembler-times "ldmia" 1 } } */
+/* { dg-final { scan-assembler-times "stmia" 1 } } */
--- a/src/gcc/testsuite/gcc.target/arm/unsigned-extend-1.c
+++ b/src/gcc/testsuite/gcc.target/arm/unsigned-extend-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv6" } */
+
+unsigned char foo (unsigned char c)
+{
+  return (c >= '0') && (c <= '9');
+}
+
+/* { dg-final { scan-assembler-not "uxtb" } } */
--- a/src/gcc/testsuite/gcc.target/arm/vfp-1.c
+++ b/src/gcc/testsuite/gcc.target/arm/vfp-1.c
@@ -127,13 +127,13 @@
 
 void test_ldst (float f[], double d[]) {
   /* { dg-final { scan-assembler "flds.+ \\\[r0, #1020\\\]" } } */
-  /* { dg-final { scan-assembler "flds.+ \\\[r0, #-1020\\\]" } } */
+  /* { dg-final { scan-assembler "flds.+ \\\[r\[0-9\], #-1020\\\]" { target { arm32 && { ! arm_thumb2_ok } } } } } */
   /* { dg-final { scan-assembler "add.+ r0, #1024" } } */
-  /* { dg-final { scan-assembler "fsts.+ \\\[r0, #0\\\]\n" } } */
+  /* { dg-final { scan-assembler "fsts.+ \\\[r\[0-9\], #0\\\]\n" } } */
   f[256] = f[255] + f[-255];
 
   /* { dg-final { scan-assembler "fldd.+ \\\[r1, #1016\\\]" } } */
-  /* { dg-final { scan-assembler "fldd.+ \\\[r1, #-1016\\\]" } } */
+  /* { dg-final { scan-assembler "fldd.+ \\\[r\[1-9\], #-1016\\\]" { target { arm32 && { ! arm_thumb2_ok } } } } } */
   /* { dg-final { scan-assembler "fstd.+ \\\[r1, #256\\\]" } } */
   d[32] = d[127] + d[-127];
 }
--- a/src/gcc/testsuite/gcc.target/arm/wmul-10.c
+++ b/src/gcc/testsuite/gcc.target/arm/wmul-10.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv7-a" } */
+
+unsigned long long
+foo (unsigned short a, unsigned short *b, unsigned short *c)
+{
+  return (unsigned)a + (unsigned long long)*b * (unsigned long long)*c;
+}
+
+/* { dg-final { scan-assembler "umlal" } } */
--- a/src/gcc/testsuite/gcc.target/arm/wmul-11.c
+++ b/src/gcc/testsuite/gcc.target/arm/wmul-11.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv7-a" } */
+
+long long
+foo (int *b)
+{
+  return 10 * (long long)*b;
+}
+
+/* { dg-final { scan-assembler "smull" } } */
--- a/src/gcc/testsuite/gcc.target/arm/wmul-12.c
+++ b/src/gcc/testsuite/gcc.target/arm/wmul-12.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv7-a" } */
+
+long long
+foo (int *b, int *c)
+{
+  long long tmp = (long long)*b * *c;
+  return 10 + tmp;
+}
+
+/* { dg-final { scan-assembler "smlal" } } */
--- a/src/gcc/testsuite/gcc.target/arm/wmul-13.c
+++ b/src/gcc/testsuite/gcc.target/arm/wmul-13.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv7-a" } */
+
+long long
+foo (int *a, int *b)
+{
+  return *a + (long long)*b * 10;
+}
+
+/* { dg-final { scan-assembler "smlal" } } */
--- a/src/gcc/testsuite/gcc.target/arm/wmul-5.c
+++ b/src/gcc/testsuite/gcc.target/arm/wmul-5.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv7-a" } */
+
+long long
+foo (long long a, char *b, char *c)
+{
+  return a + *b * *c;
+}
+
+/* { dg-final { scan-assembler "umlal" } } */
--- a/src/gcc/testsuite/gcc.target/arm/wmul-6.c
+++ b/src/gcc/testsuite/gcc.target/arm/wmul-6.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv7-a" } */
+
+long long
+foo (long long a, unsigned char *b, signed char *c)
+{
+  return a + (long long)*b * (long long)*c;
+}
+
+/* { dg-final { scan-assembler "smlal" } } */
--- a/src/gcc/testsuite/gcc.target/arm/wmul-7.c
+++ b/src/gcc/testsuite/gcc.target/arm/wmul-7.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv7-a" } */
+
+unsigned long long
+foo (unsigned long long a, unsigned char *b, unsigned short *c)
+{
+  return a + *b * *c;
+}
+
+/* { dg-final { scan-assembler "umlal" } } */
--- a/src/gcc/testsuite/gcc.target/arm/wmul-8.c
+++ b/src/gcc/testsuite/gcc.target/arm/wmul-8.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv7-a" } */
+
+long long
+foo (long long a, int *b, int *c)
+{
+  return a + (long long)*b * *c;
+}
+
+/* { dg-final { scan-assembler "smlal" } } */
--- a/src/gcc/testsuite/gcc.target/arm/wmul-9.c
+++ b/src/gcc/testsuite/gcc.target/arm/wmul-9.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv7-a" } */
+
+long long
+foo (long long a, short *b, char *c)
+{
+  return a + *b * *c;
+}
+
+/* { dg-final { scan-assembler "smlalbb" } } */
--- a/src/gcc/testsuite/gcc.target/arm/wmul-bitfield-1.c
+++ b/src/gcc/testsuite/gcc.target/arm/wmul-bitfield-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv7-a" } */
+
+struct bf
+{
+  int a : 3;
+  int b : 15;
+  int c : 3;
+};
+
+long long
+foo (long long a, struct bf b, struct bf c)
+{
+  return a + b.b * c.b;
+}
+
+/* { dg-final { scan-assembler "smlalbb" } } */
--- a/src/gcc/testsuite/gcc.target/arm/wmul-bitfield-2.c
+++ b/src/gcc/testsuite/gcc.target/arm/wmul-bitfield-2.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv7-a" } */
+
+struct bf
+{
+  int a : 3;
+  unsigned int b : 15;
+  int c : 3;
+};
+
+long long
+foo (long long a, struct bf b, struct bf c)
+{
+  return a + b.b * c.c;
+}
+
+/* { dg-final { scan-assembler "smlalbb" } } */
--- a/src/gcc/testsuite/gcc.target/arm/xor-and.c
+++ b/src/gcc/testsuite/gcc.target/arm/xor-and.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O -march=armv6" } */
+
+unsigned short foo (unsigned short x)
+{
+  x ^= 0x4002;
+  x >>= 1;
+  x |= 0x8000;
+  return x;
+}
+
+/* { dg-final { scan-assembler "orr" } } */
+/* { dg-final { scan-assembler-not "mvn" } } */
+/* { dg-final { scan-assembler-not "uxth" } } */
--- a/src/gcc/testsuite/gcc.target/i386/pr51987.c
+++ b/src/gcc/testsuite/gcc.target/i386/pr51987.c
@@ -0,0 +1,33 @@
+/* PR tree-optimization/51987 */
+/* { dg-do run { target { ! { ilp32 } } } } */
+/* { dg-options "-O3" } */
+
+extern void abort (void);
+union U { unsigned long long l; struct { unsigned int l, h; } i; };
+
+__attribute__((noinline, noclone)) void
+foo (char *x, char *y)
+{
+  int i;
+  for (i = 0; i < 64; i++)
+    {
+      union U u;
+      asm ("movl %1, %k0; salq $32, %0" : "=r" (u.l) : "r" (i));
+      x[i] = u.i.h;
+      union U v;
+      asm ("movl %1, %k0; salq $32, %0" : "=r" (v.l) : "r" (i));
+      y[i] = v.i.h;
+    }
+}
+
+int
+main ()
+{
+  char a[64], b[64];
+  int i;
+  foo (a, b);
+  for (i = 0; i < 64; i++)
+    if (a[i] != i || b[i] != i)
+      abort ();
+  return 0;
+}
--- a/src/gcc/testsuite/gcc.target/sparc/ultrasp12.c
+++ b/src/gcc/testsuite/gcc.target/sparc/ultrasp12.c
@@ -0,0 +1,64 @@
+/* PR rtl-optimization/48830 */
+/* Testcase by Hans-Peter Nilsson <hp@gcc.gnu.org> */
+
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2 -mcpu=ultrasparc -mvis" } */
+
+typedef unsigned char uint8_t;
+typedef unsigned int uint32_t;
+typedef unsigned long int uint64_t;
+typedef unsigned long int uintmax_t;
+typedef unsigned char rc_vec_t __attribute__((__vector_size__(8)));
+typedef short rc_svec_type_ __attribute__((__vector_size__(8)));
+typedef unsigned char rc_vec4_type_ __attribute__((__vector_size__(4)));
+
+void
+rc_stat_xsum_acc(const uint8_t *__restrict src1, int src1_dim,
+                 const uint8_t *__restrict src2, int src2_dim,
+                 int len, int height, uintmax_t sum[5])
+{
+    uint32_t s1 = 0;
+    uint32_t s2 = 0;
+    uintmax_t s11 = 0;
+    uintmax_t s22 = 0;
+    uintmax_t s12 = 0;
+    int full = len / ((1024) < (1024) ? (1024) : (1024));
+    int rem = len % ((1024) < (1024) ? (1024) : (1024));
+    int rem1 = rem / 1;
+    int y;
+    unsigned int rc_gsr_scale_ __attribute__ ((__unused__)) = 7; unsigned int rc_gsr_align_ __attribute__ ((__unused__)) = 4; unsigned int rc_gsr_set_ __attribute__ ((__unused__)) = 0; register unsigned int rc_gsr_fakedep_ __attribute__ ((__unused__)) = 0; unsigned int rc_gsr_ldinit_ __attribute__ ((__unused__)) = 0;
+    for (y = 0; y < height; y++) {
+        rc_vec_t a1, a2, a11, a22, a12;
+        int i1 = (y)*(src1_dim);
+        int i2 = (y)*(src2_dim);
+        int x;
+        ((a1) = ((rc_vec_t) {0}));
+        ((a2) = ((rc_vec_t) {0}));
+        ((a11) = ((rc_vec_t) {0}));
+        ((a22) = ((rc_vec_t) {0}));
+        ((a12) = ((rc_vec_t) {0}));
+        for (x = 0; x < full; x++) {
+            int k;
+            for (k = 0; k < ((1024) < (1024) ? (1024) : (1024)) /
+                            1; k++)
+            {
+                do { rc_vec_t v1, v2; ((v1) = *(const rc_vec_t*)(&(src1)[i1])); ((v2) = *(const rc_vec_t*)(&(src2)[i2])); ((a1) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(__builtin_vis_pdist (v1, ((rc_vec_t) {0}), (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a1)).i)))).v)); ((a2) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(__builtin_vis_pdist (v2, ((rc_vec_t) {0}), (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a2)).i)))).v)); do { rc_vec_t s1_ = (v1); rc_vec_t s2_ = (v1); rc_vec_t accvin_ = (a11); rc_vec_t s1lo7_, s1msb_, accvout_; uint32_t maclo_, machi_; rc_svec_type_ masklow_ = (rc_svec_type_){(255), (255), (255), (255)}; rc_svec_type_ s1msbhi_, s1msblo_, s1lo7hi_, s1lo7lo_; rc_svec_type_ s1msbdiv2hi_, s1msbdiv2lo_; rc_vec4_type_ s1lo7hi4_, s1lo7lo4_, s1msbhi4_, s1msblo4_; rc_vec4_type_ s1msbdiv2hi4_, s1msbdiv2lo4_, s2hi4_, s2lo4_; rc_vec4_type_ accvhi4_, accvlo4_; rc_svec_type_ mulhilo7_, mullolo7_, mulhimsbdiv2_, mullomsbdiv2_; rc_svec_type_ mulhi_, mullo_, mulhihi_, mullohi_; rc_svec_type_ mulhilo_, mullolo_; rc_vec4_type_ zero4_ = (((union { rc_vec4_type_ v; uint64_t i; })(uint64_t)(0)).v); rc_vec_t msb_ = (rc_vec_t){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80)}; ((s1msb_) = (s1_) & (msb_)); ((s1lo7_) = (s1_) & (~msb_)); do { if (rc_gsr_ldinit_) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void); rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if (!__builtin_constant_p(rc_gsr_align_) || !__builtin_constant_p(2) || !rc_gsr_set_ || (unsigned) (rc_gsr_align_) != rc_gsr_align_ || (unsigned) (2) != rc_gsr_scale_) { rc_gsr_set_ = 1; rc_gsr_align_ = (rc_gsr_align_); rc_gsr_scale_ = (2); unsigned int val_ = (rc_gsr_scale_ << 3) | rc_gsr_align_; if (__builtin_constant_p (val_)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "i" (val_), "1" (rc_gsr_fakedep_)); } else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "r" (val_), "1" (rc_gsr_fakedep_)); } } else { __asm__("\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_) : "0" (s1msb_), [fakegsr] "g" (rc_gsr_fakedep_)); } } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1msb_); (s1msbhi4_) = hl_.hilo_.hi_; (s1msblo4_) = hl_.hilo_.lo_; } while (0); s1msbhi_ = __builtin_vis_fexpand(s1msbhi4_); s1msblo_ = __builtin_vis_fexpand(s1msblo4_); s1msbdiv2hi4_ = __builtin_vis_fpack16(s1msbhi_); s1msbdiv2lo4_ = __builtin_vis_fpack16(s1msblo_); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s2_); (s2hi4_) = hl_.hilo_.hi_; (s2lo4_) = hl_.hilo_.lo_; } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1lo7_); (s1lo7hi4_) = hl_.hilo_.hi_; (s1lo7lo4_) = hl_.hilo_.lo_; } while (0); s1msbdiv2hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2hi4_, zero4_); s1msbdiv2lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2lo4_, zero4_); s1lo7hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7hi4_, zero4_); s1lo7lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7lo4_, zero4_); mulhilo7_ = __builtin_vis_fmul8x16(s2hi4_, s1lo7hi_); mullolo7_ = __builtin_vis_fmul8x16(s2lo4_, s1lo7lo_); mulhimsbdiv2_ = __builtin_vis_fmul8x16(s2hi4_, s1msbdiv2hi_); mullomsbdiv2_ = __builtin_vis_fmul8x16(s2lo4_, s1msbdiv2lo_); mulhi_ = mulhilo7_ + mulhimsbdiv2_ + mulhimsbdiv2_; mullo_ = mullolo7_ + mullomsbdiv2_ + mullomsbdiv2_; mulhihi_ = mulhi_ & ~masklow_; mulhilo_ = mulhi_ & masklow_; mullohi_ = mullo_ & ~masklow_; mullolo_ = mullo_ & masklow_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (accvin_); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); maclo_ = __builtin_vis_pdist ((rc_vec_t)mullolo_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i)); maclo_ = __builtin_vis_pdist ((rc_vec_t)mulhilo_, ((rc_vec_t) {0}), maclo_); machi_ = __builtin_vis_pdist ((rc_vec_t)mullohi_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i)); machi_ = __builtin_vis_pdist ((rc_vec_t)mulhihi_, ((rc_vec_t) {0}), machi_); do { typedef union { struct { rc_vec4_type_ hi_, lo_; } hilo_; rc_vec_t v_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) {{((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)machi_)).v)), ((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)maclo_)).v))}}; (accvout_) = hl_.v_; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on %[xdep]" : [fakegsr] "=brm" (rc_gsr_fakedep_) : [xdep] "brm" (accvout_), "0" (rc_gsr_fakedep_)); (a11) = accvout_; } while (0); do { rc_vec_t s1_ = (v2); rc_vec_t s2_ = (v2); rc_vec_t accvin_ = (a22); rc_vec_t s1lo7_, s1msb_, accvout_; uint32_t maclo_, machi_; rc_svec_type_ masklow_ = (rc_svec_type_){(255), (255), (255), (255)}; rc_svec_type_ s1msbhi_, s1msblo_, s1lo7hi_, s1lo7lo_; rc_svec_type_ s1msbdiv2hi_, s1msbdiv2lo_; rc_vec4_type_ s1lo7hi4_, s1lo7lo4_, s1msbhi4_, s1msblo4_; rc_vec4_type_ s1msbdiv2hi4_, s1msbdiv2lo4_, s2hi4_, s2lo4_; rc_vec4_type_ accvhi4_, accvlo4_; rc_svec_type_ mulhilo7_, mullolo7_, mulhimsbdiv2_, mullomsbdiv2_; rc_svec_type_ mulhi_, mullo_, mulhihi_, mullohi_; rc_svec_type_ mulhilo_, mullolo_; rc_vec4_type_ zero4_ = (((union { rc_vec4_type_ v; uint64_t i; })(uint64_t)(0)).v); rc_vec_t msb_ = (rc_vec_t){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80)}; ((s1msb_) = (s1_) & (msb_)); ((s1lo7_) = (s1_) & (~msb_)); do { if (rc_gsr_ldinit_) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void); rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if (!__builtin_constant_p(rc_gsr_align_) || !__builtin_constant_p(2) || !rc_gsr_set_ || (unsigned) (rc_gsr_align_) != rc_gsr_align_ || (unsigned) (2) != rc_gsr_scale_) { rc_gsr_set_ = 1; rc_gsr_align_ = (rc_gsr_align_); rc_gsr_scale_ = (2); unsigned int val_ = (rc_gsr_scale_ << 3) | rc_gsr_align_; if (__builtin_constant_p (val_)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "i" (val_), "1" (rc_gsr_fakedep_)); } else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "r" (val_), "1" (rc_gsr_fakedep_)); } } else { __asm__("\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_) : "0" (s1msb_), [fakegsr] "g" (rc_gsr_fakedep_)); } } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1msb_); (s1msbhi4_) = hl_.hilo_.hi_; (s1msblo4_) = hl_.hilo_.lo_; } while (0); s1msbhi_ = __builtin_vis_fexpand(s1msbhi4_); s1msblo_ = __builtin_vis_fexpand(s1msblo4_); s1msbdiv2hi4_ = __builtin_vis_fpack16(s1msbhi_); s1msbdiv2lo4_ = __builtin_vis_fpack16(s1msblo_); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s2_); (s2hi4_) = hl_.hilo_.hi_; (s2lo4_) = hl_.hilo_.lo_; } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1lo7_); (s1lo7hi4_) = hl_.hilo_.hi_; (s1lo7lo4_) = hl_.hilo_.lo_; } while (0); s1msbdiv2hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2hi4_, zero4_); s1msbdiv2lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2lo4_, zero4_); s1lo7hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7hi4_, zero4_); s1lo7lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7lo4_, zero4_); mulhilo7_ = __builtin_vis_fmul8x16(s2hi4_, s1lo7hi_); mullolo7_ = __builtin_vis_fmul8x16(s2lo4_, s1lo7lo_); mulhimsbdiv2_ = __builtin_vis_fmul8x16(s2hi4_, s1msbdiv2hi_); mullomsbdiv2_ = __builtin_vis_fmul8x16(s2lo4_, s1msbdiv2lo_); mulhi_ = mulhilo7_ + mulhimsbdiv2_ + mulhimsbdiv2_; mullo_ = mullolo7_ + mullomsbdiv2_ + mullomsbdiv2_; mulhihi_ = mulhi_ & ~masklow_; mulhilo_ = mulhi_ & masklow_; mullohi_ = mullo_ & ~masklow_; mullolo_ = mullo_ & masklow_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (accvin_); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); maclo_ = __builtin_vis_pdist ((rc_vec_t)mullolo_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i)); maclo_ = __builtin_vis_pdist ((rc_vec_t)mulhilo_, ((rc_vec_t) {0}), maclo_); machi_ = __builtin_vis_pdist ((rc_vec_t)mullohi_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i)); machi_ = __builtin_vis_pdist ((rc_vec_t)mulhihi_, ((rc_vec_t) {0}), machi_); do { typedef union { struct { rc_vec4_type_ hi_, lo_; } hilo_; rc_vec_t v_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) {{((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)machi_)).v)), ((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)maclo_)).v))}}; (accvout_) = hl_.v_; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on %[xdep]" : [fakegsr] "=brm" (rc_gsr_fakedep_) : [xdep] "brm" (accvout_), "0" (rc_gsr_fakedep_)); (a22) = accvout_; } while (0); do { rc_vec_t s1_ = (v1); rc_vec_t s2_ = (v2); rc_vec_t accvin_ = (a12); rc_vec_t s1lo7_, s1msb_, accvout_; uint32_t maclo_, machi_; rc_svec_type_ masklow_ = (rc_svec_type_){(255), (255), (255), (255)}; rc_svec_type_ s1msbhi_, s1msblo_, s1lo7hi_, s1lo7lo_; rc_svec_type_ s1msbdiv2hi_, s1msbdiv2lo_; rc_vec4_type_ s1lo7hi4_, s1lo7lo4_, s1msbhi4_, s1msblo4_; rc_vec4_type_ s1msbdiv2hi4_, s1msbdiv2lo4_, s2hi4_, s2lo4_; rc_vec4_type_ accvhi4_, accvlo4_; rc_svec_type_ mulhilo7_, mullolo7_, mulhimsbdiv2_, mullomsbdiv2_; rc_svec_type_ mulhi_, mullo_, mulhihi_, mullohi_; rc_svec_type_ mulhilo_, mullolo_; rc_vec4_type_ zero4_ = (((union { rc_vec4_type_ v; uint64_t i; })(uint64_t)(0)).v); rc_vec_t msb_ = (rc_vec_t){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80)}; ((s1msb_) = (s1_) & (msb_)); ((s1lo7_) = (s1_) & (~msb_)); do { if (rc_gsr_ldinit_) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void); rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if (!__builtin_constant_p(rc_gsr_align_) || !__builtin_constant_p(2) || !rc_gsr_set_ || (unsigned) (rc_gsr_align_) != rc_gsr_align_ || (unsigned) (2) != rc_gsr_scale_) { rc_gsr_set_ = 1; rc_gsr_align_ = (rc_gsr_align_); rc_gsr_scale_ = (2); unsigned int val_ = (rc_gsr_scale_ << 3) | rc_gsr_align_; if (__builtin_constant_p (val_)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "i" (val_), "1" (rc_gsr_fakedep_)); } else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "r" (val_), "1" (rc_gsr_fakedep_)); } } else { __asm__("\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_) : "0" (s1msb_), [fakegsr] "g" (rc_gsr_fakedep_)); } } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1msb_); (s1msbhi4_) = hl_.hilo_.hi_; (s1msblo4_) = hl_.hilo_.lo_; } while (0); s1msbhi_ = __builtin_vis_fexpand(s1msbhi4_); s1msblo_ = __builtin_vis_fexpand(s1msblo4_); s1msbdiv2hi4_ = __builtin_vis_fpack16(s1msbhi_); s1msbdiv2lo4_ = __builtin_vis_fpack16(s1msblo_); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s2_); (s2hi4_) = hl_.hilo_.hi_; (s2lo4_) = hl_.hilo_.lo_; } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1lo7_); (s1lo7hi4_) = hl_.hilo_.hi_; (s1lo7lo4_) = hl_.hilo_.lo_; } while (0); s1msbdiv2hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2hi4_, zero4_); s1msbdiv2lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2lo4_, zero4_); s1lo7hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7hi4_, zero4_); s1lo7lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7lo4_, zero4_); mulhilo7_ = __builtin_vis_fmul8x16(s2hi4_, s1lo7hi_); mullolo7_ = __builtin_vis_fmul8x16(s2lo4_, s1lo7lo_); mulhimsbdiv2_ = __builtin_vis_fmul8x16(s2hi4_, s1msbdiv2hi_); mullomsbdiv2_ = __builtin_vis_fmul8x16(s2lo4_, s1msbdiv2lo_); mulhi_ = mulhilo7_ + mulhimsbdiv2_ + mulhimsbdiv2_; mullo_ = mullolo7_ + mullomsbdiv2_ + mullomsbdiv2_; mulhihi_ = mulhi_ & ~masklow_; mulhilo_ = mulhi_ & masklow_; mullohi_ = mullo_ & ~masklow_; mullolo_ = mullo_ & masklow_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (accvin_); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); maclo_ = __builtin_vis_pdist ((rc_vec_t)mullolo_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i)); maclo_ = __builtin_vis_pdist ((rc_vec_t)mulhilo_, ((rc_vec_t) {0}), maclo_); machi_ = __builtin_vis_pdist ((rc_vec_t)mullohi_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i)); machi_ = __builtin_vis_pdist ((rc_vec_t)mulhihi_, ((rc_vec_t) {0}), machi_); do { typedef union { struct { rc_vec4_type_ hi_, lo_; } hilo_; rc_vec_t v_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) {{((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)machi_)).v)), ((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)maclo_)).v))}}; (accvout_) = hl_.v_; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on %[xdep]" : [fakegsr] "=brm" (rc_gsr_fakedep_) : [xdep] "brm" (accvout_), "0" (rc_gsr_fakedep_)); (a12) = accvout_; } while (0); (i1) += 8; (i2) += 8; } while (0);
+
+            }
+            do { uint32_t t1, t2, t11, t22, t12; ((t1) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a1)).i)); ((t2) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a2)).i)); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a11); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t11) = maclo_ + machi_ * 256; } while (0); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a22); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t22) = maclo_ + machi_ * 256; } while (0); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a12); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t12) = maclo_ + machi_ * 256; } while (0); ((a1) = ((rc_vec_t) {0})); ((a2) = ((rc_vec_t) {0})); ((a11) = ((rc_vec_t) {0})); ((a22) = ((rc_vec_t) {0})); ((a12) = ((rc_vec_t) {0})); (s1) += t1; (s2) += t2; (s11) += t11; (s22) += t22; (s12) += t12; } while (0);
+        }
+        for (x = 0; x < rem1; x++) {
+            do { rc_vec_t v1, v2; ((v1) = *(const rc_vec_t*)(&(src1)[i1])); ((v2) = *(const rc_vec_t*)(&(src2)[i2])); ((a1) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(__builtin_vis_pdist (v1, ((rc_vec_t) {0}), (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a1)).i)))).v)); ((a2) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(__builtin_vis_pdist (v2, ((rc_vec_t) {0}), (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a2)).i)))).v)); do { rc_vec_t s1_ = (v1); rc_vec_t s2_ = (v1); rc_vec_t accvin_ = (a11); rc_vec_t s1lo7_, s1msb_, accvout_; uint32_t maclo_, machi_; rc_svec_type_ masklow_ = (rc_svec_type_){(255), (255), (255), (255)}; rc_svec_type_ s1msbhi_, s1msblo_, s1lo7hi_, s1lo7lo_; rc_svec_type_ s1msbdiv2hi_, s1msbdiv2lo_; rc_vec4_type_ s1lo7hi4_, s1lo7lo4_, s1msbhi4_, s1msblo4_; rc_vec4_type_ s1msbdiv2hi4_, s1msbdiv2lo4_, s2hi4_, s2lo4_; rc_vec4_type_ accvhi4_, accvlo4_; rc_svec_type_ mulhilo7_, mullolo7_, mulhimsbdiv2_, mullomsbdiv2_; rc_svec_type_ mulhi_, mullo_, mulhihi_, mullohi_; rc_svec_type_ mulhilo_, mullolo_; rc_vec4_type_ zero4_ = (((union { rc_vec4_type_ v; uint64_t i; })(uint64_t)(0)).v); rc_vec_t msb_ = (rc_vec_t){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80)}; ((s1msb_) = (s1_) & (msb_)); ((s1lo7_) = (s1_) & (~msb_)); do { if (rc_gsr_ldinit_) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void); rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if (!__builtin_constant_p(rc_gsr_align_) || !__builtin_constant_p(2) || !rc_gsr_set_ || (unsigned) (rc_gsr_align_) != rc_gsr_align_ || (unsigned) (2) != rc_gsr_scale_) { rc_gsr_set_ = 1; rc_gsr_align_ = (rc_gsr_align_); rc_gsr_scale_ = (2); unsigned int val_ = (rc_gsr_scale_ << 3) | rc_gsr_align_; if (__builtin_constant_p (val_)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "i" (val_), "1" (rc_gsr_fakedep_)); } else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "r" (val_), "1" (rc_gsr_fakedep_)); } } else { __asm__("\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_) : "0" (s1msb_), [fakegsr] "g" (rc_gsr_fakedep_)); } } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1msb_); (s1msbhi4_) = hl_.hilo_.hi_; (s1msblo4_) = hl_.hilo_.lo_; } while (0); s1msbhi_ = __builtin_vis_fexpand(s1msbhi4_); s1msblo_ = __builtin_vis_fexpand(s1msblo4_); s1msbdiv2hi4_ = __builtin_vis_fpack16(s1msbhi_); s1msbdiv2lo4_ = __builtin_vis_fpack16(s1msblo_); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s2_); (s2hi4_) = hl_.hilo_.hi_; (s2lo4_) = hl_.hilo_.lo_; } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1lo7_); (s1lo7hi4_) = hl_.hilo_.hi_; (s1lo7lo4_) = hl_.hilo_.lo_; } while (0); s1msbdiv2hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2hi4_, zero4_); s1msbdiv2lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2lo4_, zero4_); s1lo7hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7hi4_, zero4_); s1lo7lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7lo4_, zero4_); mulhilo7_ = __builtin_vis_fmul8x16(s2hi4_, s1lo7hi_); mullolo7_ = __builtin_vis_fmul8x16(s2lo4_, s1lo7lo_); mulhimsbdiv2_ = __builtin_vis_fmul8x16(s2hi4_, s1msbdiv2hi_); mullomsbdiv2_ = __builtin_vis_fmul8x16(s2lo4_, s1msbdiv2lo_); mulhi_ = mulhilo7_ + mulhimsbdiv2_ + mulhimsbdiv2_; mullo_ = mullolo7_ + mullomsbdiv2_ + mullomsbdiv2_; mulhihi_ = mulhi_ & ~masklow_; mulhilo_ = mulhi_ & masklow_; mullohi_ = mullo_ & ~masklow_; mullolo_ = mullo_ & masklow_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (accvin_); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); maclo_ = __builtin_vis_pdist ((rc_vec_t)mullolo_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i)); maclo_ = __builtin_vis_pdist ((rc_vec_t)mulhilo_, ((rc_vec_t) {0}), maclo_); machi_ = __builtin_vis_pdist ((rc_vec_t)mullohi_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i)); machi_ = __builtin_vis_pdist ((rc_vec_t)mulhihi_, ((rc_vec_t) {0}), machi_); do { typedef union { struct { rc_vec4_type_ hi_, lo_; } hilo_; rc_vec_t v_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) {{((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)machi_)).v)), ((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)maclo_)).v))}}; (accvout_) = hl_.v_; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on %[xdep]" : [fakegsr] "=brm" (rc_gsr_fakedep_) : [xdep] "brm" (accvout_), "0" (rc_gsr_fakedep_)); (a11) = accvout_; } while (0); do { rc_vec_t s1_ = (v2); rc_vec_t s2_ = (v2); rc_vec_t accvin_ = (a22); rc_vec_t s1lo7_, s1msb_, accvout_; uint32_t maclo_, machi_; rc_svec_type_ masklow_ = (rc_svec_type_){(255), (255), (255), (255)}; rc_svec_type_ s1msbhi_, s1msblo_, s1lo7hi_, s1lo7lo_; rc_svec_type_ s1msbdiv2hi_, s1msbdiv2lo_; rc_vec4_type_ s1lo7hi4_, s1lo7lo4_, s1msbhi4_, s1msblo4_; rc_vec4_type_ s1msbdiv2hi4_, s1msbdiv2lo4_, s2hi4_, s2lo4_; rc_vec4_type_ accvhi4_, accvlo4_; rc_svec_type_ mulhilo7_, mullolo7_, mulhimsbdiv2_, mullomsbdiv2_; rc_svec_type_ mulhi_, mullo_, mulhihi_, mullohi_; rc_svec_type_ mulhilo_, mullolo_; rc_vec4_type_ zero4_ = (((union { rc_vec4_type_ v; uint64_t i; })(uint64_t)(0)).v); rc_vec_t msb_ = (rc_vec_t){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80)}; ((s1msb_) = (s1_) & (msb_)); ((s1lo7_) = (s1_) & (~msb_)); do { if (rc_gsr_ldinit_) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void); rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if (!__builtin_constant_p(rc_gsr_align_) || !__builtin_constant_p(2) || !rc_gsr_set_ || (unsigned) (rc_gsr_align_) != rc_gsr_align_ || (unsigned) (2) != rc_gsr_scale_) { rc_gsr_set_ = 1; rc_gsr_align_ = (rc_gsr_align_); rc_gsr_scale_ = (2); unsigned int val_ = (rc_gsr_scale_ << 3) | rc_gsr_align_; if (__builtin_constant_p (val_)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "i" (val_), "1" (rc_gsr_fakedep_)); } else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "r" (val_), "1" (rc_gsr_fakedep_)); } } else { __asm__("\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_) : "0" (s1msb_), [fakegsr] "g" (rc_gsr_fakedep_)); } } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1msb_); (s1msbhi4_) = hl_.hilo_.hi_; (s1msblo4_) = hl_.hilo_.lo_; } while (0); s1msbhi_ = __builtin_vis_fexpand(s1msbhi4_); s1msblo_ = __builtin_vis_fexpand(s1msblo4_); s1msbdiv2hi4_ = __builtin_vis_fpack16(s1msbhi_); s1msbdiv2lo4_ = __builtin_vis_fpack16(s1msblo_); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s2_); (s2hi4_) = hl_.hilo_.hi_; (s2lo4_) = hl_.hilo_.lo_; } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1lo7_); (s1lo7hi4_) = hl_.hilo_.hi_; (s1lo7lo4_) = hl_.hilo_.lo_; } while (0); s1msbdiv2hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2hi4_, zero4_); s1msbdiv2lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2lo4_, zero4_); s1lo7hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7hi4_, zero4_); s1lo7lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7lo4_, zero4_); mulhilo7_ = __builtin_vis_fmul8x16(s2hi4_, s1lo7hi_); mullolo7_ = __builtin_vis_fmul8x16(s2lo4_, s1lo7lo_); mulhimsbdiv2_ = __builtin_vis_fmul8x16(s2hi4_, s1msbdiv2hi_); mullomsbdiv2_ = __builtin_vis_fmul8x16(s2lo4_, s1msbdiv2lo_); mulhi_ = mulhilo7_ + mulhimsbdiv2_ + mulhimsbdiv2_; mullo_ = mullolo7_ + mullomsbdiv2_ + mullomsbdiv2_; mulhihi_ = mulhi_ & ~masklow_; mulhilo_ = mulhi_ & masklow_; mullohi_ = mullo_ & ~masklow_; mullolo_ = mullo_ & masklow_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (accvin_); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); maclo_ = __builtin_vis_pdist ((rc_vec_t)mullolo_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i)); maclo_ = __builtin_vis_pdist ((rc_vec_t)mulhilo_, ((rc_vec_t) {0}), maclo_); machi_ = __builtin_vis_pdist ((rc_vec_t)mullohi_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i)); machi_ = __builtin_vis_pdist ((rc_vec_t)mulhihi_, ((rc_vec_t) {0}), machi_); do { typedef union { struct { rc_vec4_type_ hi_, lo_; } hilo_; rc_vec_t v_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) {{((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)machi_)).v)), ((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)maclo_)).v))}}; (accvout_) = hl_.v_; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on %[xdep]" : [fakegsr] "=brm" (rc_gsr_fakedep_) : [xdep] "brm" (accvout_), "0" (rc_gsr_fakedep_)); (a22) = accvout_; } while (0); do { rc_vec_t s1_ = (v1); rc_vec_t s2_ = (v2); rc_vec_t accvin_ = (a12); rc_vec_t s1lo7_, s1msb_, accvout_; uint32_t maclo_, machi_; rc_svec_type_ masklow_ = (rc_svec_type_){(255), (255), (255), (255)}; rc_svec_type_ s1msbhi_, s1msblo_, s1lo7hi_, s1lo7lo_; rc_svec_type_ s1msbdiv2hi_, s1msbdiv2lo_; rc_vec4_type_ s1lo7hi4_, s1lo7lo4_, s1msbhi4_, s1msblo4_; rc_vec4_type_ s1msbdiv2hi4_, s1msbdiv2lo4_, s2hi4_, s2lo4_; rc_vec4_type_ accvhi4_, accvlo4_; rc_svec_type_ mulhilo7_, mullolo7_, mulhimsbdiv2_, mullomsbdiv2_; rc_svec_type_ mulhi_, mullo_, mulhihi_, mullohi_; rc_svec_type_ mulhilo_, mullolo_; rc_vec4_type_ zero4_ = (((union { rc_vec4_type_ v; uint64_t i; })(uint64_t)(0)).v); rc_vec_t msb_ = (rc_vec_t){(0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80), (0x80)}; ((s1msb_) = (s1_) & (msb_)); ((s1lo7_) = (s1_) & (~msb_)); do { if (rc_gsr_ldinit_) { extern void rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(void); rc_mixing_GSR_setting_with_RC_VEC_LDINIT_(); } if (!__builtin_constant_p(rc_gsr_align_) || !__builtin_constant_p(2) || !rc_gsr_set_ || (unsigned) (rc_gsr_align_) != rc_gsr_align_ || (unsigned) (2) != rc_gsr_scale_) { rc_gsr_set_ = 1; rc_gsr_align_ = (rc_gsr_align_); rc_gsr_scale_ = (2); unsigned int val_ = (rc_gsr_scale_ << 3) | rc_gsr_align_; if (__builtin_constant_p (val_)) { __asm__("wr %%g0,%[gsrval],%%gsr\n" ";# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "i" (val_), "1" (rc_gsr_fakedep_)); } else { __asm__("wr %[gsrval],0,%%gsr" "\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_), [fakegsr] "=rm" (rc_gsr_fakedep_) : "0" (s1msb_), [gsrval] "r" (val_), "1" (rc_gsr_fakedep_)); } } else { __asm__("\n;# dep %[depvec] on fake GSR %[fakegsr]" : [depvec] "=brm" (s1msb_) : "0" (s1msb_), [fakegsr] "g" (rc_gsr_fakedep_)); } } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1msb_); (s1msbhi4_) = hl_.hilo_.hi_; (s1msblo4_) = hl_.hilo_.lo_; } while (0); s1msbhi_ = __builtin_vis_fexpand(s1msbhi4_); s1msblo_ = __builtin_vis_fexpand(s1msblo4_); s1msbdiv2hi4_ = __builtin_vis_fpack16(s1msbhi_); s1msbdiv2lo4_ = __builtin_vis_fpack16(s1msblo_); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s2_); (s2hi4_) = hl_.hilo_.hi_; (s2lo4_) = hl_.hilo_.lo_; } while (0); do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (s1lo7_); (s1lo7hi4_) = hl_.hilo_.hi_; (s1lo7lo4_) = hl_.hilo_.lo_; } while (0); s1msbdiv2hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2hi4_, zero4_); s1msbdiv2lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1msbdiv2lo4_, zero4_); s1lo7hi_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7hi4_, zero4_); s1lo7lo_ = (rc_svec_type_)__builtin_vis_fpmerge(s1lo7lo4_, zero4_); mulhilo7_ = __builtin_vis_fmul8x16(s2hi4_, s1lo7hi_); mullolo7_ = __builtin_vis_fmul8x16(s2lo4_, s1lo7lo_); mulhimsbdiv2_ = __builtin_vis_fmul8x16(s2hi4_, s1msbdiv2hi_); mullomsbdiv2_ = __builtin_vis_fmul8x16(s2lo4_, s1msbdiv2lo_); mulhi_ = mulhilo7_ + mulhimsbdiv2_ + mulhimsbdiv2_; mullo_ = mullolo7_ + mullomsbdiv2_ + mullomsbdiv2_; mulhihi_ = mulhi_ & ~masklow_; mulhilo_ = mulhi_ & masklow_; mullohi_ = mullo_ & ~masklow_; mullolo_ = mullo_ & masklow_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (accvin_); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); maclo_ = __builtin_vis_pdist ((rc_vec_t)mullolo_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i)); maclo_ = __builtin_vis_pdist ((rc_vec_t)mulhilo_, ((rc_vec_t) {0}), maclo_); machi_ = __builtin_vis_pdist ((rc_vec_t)mullohi_, ((rc_vec_t) {0}), (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i)); machi_ = __builtin_vis_pdist ((rc_vec_t)mulhihi_, ((rc_vec_t) {0}), machi_); do { typedef union { struct { rc_vec4_type_ hi_, lo_; } hilo_; rc_vec_t v_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) {{((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)machi_)).v)), ((((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)((uint32_t)maclo_)).v))}}; (accvout_) = hl_.v_; } while (0); __asm__("\n;# dep fake GSR %[fakegsr] on %[xdep]" : [fakegsr] "=brm" (rc_gsr_fakedep_) : [xdep] "brm" (accvout_), "0" (rc_gsr_fakedep_)); (a12) = accvout_; } while (0); (i1) += 8; (i2) += 8; } while (0);
+        }
+        do { uint32_t t1, t2, t11, t22, t12; ((t1) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a1)).i)); ((t2) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a2)).i)); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a11); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t11) = maclo_ + machi_ * 256; } while (0); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a22); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t22) = maclo_ + machi_ * 256; } while (0); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a12); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t12) = maclo_ + machi_ * 256; } while (0); ((a1) = ((rc_vec_t) {0})); ((a2) = ((rc_vec_t) {0})); ((a11) = ((rc_vec_t) {0})); ((a22) = ((rc_vec_t) {0})); ((a12) = ((rc_vec_t) {0})); (s1) += t1; (s2) += t2; (s11) += t11; (s22) += t22; (s12) += t12; } while (0);
+
+        do { uint32_t t1, t2, t11, t22, t12; ((t1) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a1)).i)); ((t2) = (((union { rc_vec_t v; uint64_t i; })(uint64_t)(a2)).i)); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a11); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t11) = maclo_ + machi_ * 256; } while (0); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a22); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t22) = maclo_ + machi_ * 256; } while (0); do { rc_vec4_type_ accvhi4_, accvlo4_; uint64_t machi_, maclo_; do { typedef union { rc_vec_t v_; struct { rc_vec4_type_ hi_, lo_; } hilo_; } RC_hl_type_; RC_hl_type_ hl_ = (RC_hl_type_) (a12); (accvhi4_) = hl_.hilo_.hi_; (accvlo4_) = hl_.hilo_.lo_; } while (0); machi_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvhi4_)).i); maclo_ = (((union { rc_vec4_type_ v; uint32_t i; })(uint32_t)(accvlo4_)).i); (t12) = maclo_ + machi_ * 256; } while (0); ((a1) = ((rc_vec_t) {0})); ((a2) = ((rc_vec_t) {0})); ((a11) = ((rc_vec_t) {0})); ((a22) = ((rc_vec_t) {0})); ((a12) = ((rc_vec_t) {0})); (s1) += t1; (s2) += t2; (s11) += t11; (s22) += t22; (s12) += t12; } while (0);
+    }
+    sum[0] = s1;
+    sum[1] = s2;
+    sum[2] = s11;
+    sum[3] = s22;
+    sum[4] = s12;
+    ;
+}
--- a/src/gcc/testsuite/gfortran.dg/vect/pr19049.f90
+++ b/src/gcc/testsuite/gfortran.dg/vect/pr19049.f90
@@ -19,6 +19,7 @@
       end
 
 ! { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } }
-! { dg-final { scan-tree-dump-times "complicated access pattern" 1 "vect" } }
+! { dg-final { scan-tree-dump-times "complicated access pattern" 1 "vect" { xfail vect_multiple_sizes } } }
+! { dg-final { scan-tree-dump-times "complicated access pattern" 2 "vect" { target vect_multiple_sizes } } }
 ! { dg-final { cleanup-tree-dump "vect" } }
 
--- a/src/gcc/testsuite/lib/target-supports.exp
+++ b/src/gcc/testsuite/lib/target-supports.exp
@@ -1894,6 +1894,18 @@
     }]
 }
 
+# Return 1 if this is an ARM target that supports unaligned word/halfword
+# load/store instructions.
+
+proc check_effective_target_arm_unaligned { } {
+    return [check_no_compiler_messages arm_unaligned assembly {
+	#ifndef __ARM_FEATURE_UNALIGNED
+	#error no unaligned support
+	#endif
+	int i;
+    }]
+}
+
 # Add the options needed for NEON.  We need either -mfloat-abi=softfp
 # or -mfloat-abi=hard, but if one is already specified by the
 # multilib, use it.  Similarly, if a -mfpu option already enables
@@ -1988,6 +2000,47 @@
 		check_effective_target_arm_fp16_ok_nocache]
 }
 
+# Creates a series of routines that return 1 if the given architecture
+# can be selected and a routine to give the flags to select that architecture
+# Note: Extra flags may be added to disable options from newer compilers
+# (Thumb in particular - but others may be added in the future)
+# Usage: /* { dg-require-effective-target arm_arch_v5_ok } */
+#        /* { dg-add-options arm_arch_v5 } */
+foreach { armfunc armflag armdef } { v5 "-march=armv5 -marm" __ARM_ARCH_5__
+				     v6 "-march=armv6" __ARM_ARCH_6__
+				     v6k "-march=armv6k" __ARM_ARCH_6K__
+				     v7a "-march=armv7-a" __ARM_ARCH_7A__ } {
+    eval [string map [list FUNC $armfunc FLAG $armflag DEF $armdef ] {
+	proc check_effective_target_arm_arch_FUNC_ok { } {
+	    if { [ string match "*-marm*" "FLAG" ] &&
+		![check_effective_target_arm_arm_ok] } {
+		return 0
+	    }
+	    return [check_no_compiler_messages arm_arch_FUNC_ok assembly {
+		#if !defined (DEF)
+		#error FOO
+		#endif
+	    } "FLAG" ]
+	}
+
+	proc add_options_for_arm_arch_FUNC { flags } {
+	    return "$flags FLAG"
+	}
+    }]
+}
+
+# Return 1 if this is an ARM target where -marm causes ARM to be
+# used (not Thumb)
+
+proc check_effective_target_arm_arm_ok { } {
+    return [check_no_compiler_messages arm_arm_ok assembly {
+	#if !defined (__arm__) || defined (__thumb__) || defined (__thumb2__)
+	#error FOO
+	#endif
+    } "-marm"]
+}
+
+
 # Return 1 is this is an ARM target where -mthumb causes Thumb-1 to be
 # used.
 
@@ -2338,6 +2391,26 @@
 }
 
 
+# Return 1 if the target supports hardware vector shift operation for char.
+
+proc check_effective_target_vect_shift_char { } {
+    global et_vect_shift_char_saved
+
+    if [info exists et_vect_shift_char_saved] {
+	verbose "check_effective_target_vect_shift_char: using cached result" 2
+    } else {
+	set et_vect_shift_char_saved 0
+	if { ([istarget powerpc*-*-*]
+             && ![istarget powerpc-*-linux*paired*])
+	     || [check_effective_target_arm32] } {
+	   set et_vect_shift_char_saved 1
+	}
+    }
+
+    verbose "check_effective_target_vect_shift_char: returning $et_vect_shift_char_saved" 2
+    return $et_vect_shift_char_saved
+}
+
 # Return 1 if the target supports hardware vectors of long, 0 otherwise.
 #
 # This can change for different subtargets so do not cache the result.
@@ -2673,7 +2746,8 @@
 	} else {
 	    set et_vect_widen_mult_qi_to_hi_saved 0
 	}
-        if { [istarget powerpc*-*-*] } {
+        if { [istarget powerpc*-*-*]
+              || ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } {
             set et_vect_widen_mult_qi_to_hi_saved 1
         }
     }
@@ -2706,7 +2780,8 @@
 	      || [istarget spu-*-*]
 	      || [istarget ia64-*-*]
 	      || [istarget i?86-*-*]
-	      || [istarget x86_64-*-*] } {
+	      || [istarget x86_64-*-*]
+              || ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } {
             set et_vect_widen_mult_hi_to_si_saved 1
         }
     }
@@ -2715,6 +2790,72 @@
 }
 
 # Return 1 if the target plus current options supports a vector
+# widening multiplication of *char* args into *short* result, 0 otherwise.
+#
+# This won't change for different subtargets so cache the result.
+
+proc check_effective_target_vect_widen_mult_qi_to_hi_pattern { } {
+    global et_vect_widen_mult_qi_to_hi_pattern
+
+    if [info exists et_vect_widen_mult_qi_to_hi_pattern_saved] {
+        verbose "check_effective_target_vect_widen_mult_qi_to_hi_pattern: using cached result" 2
+    } else {
+        set et_vect_widen_mult_qi_to_hi_pattern_saved 0
+        if { [istarget powerpc*-*-*]
+              || ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } {
+            set et_vect_widen_mult_qi_to_hi_pattern_saved 1
+        }
+    }
+    verbose "check_effective_target_vect_widen_mult_qi_to_hi_pattern: returning $et_vect_widen_mult_qi_to_hi_pattern_saved" 2
+    return $et_vect_widen_mult_qi_to_hi_pattern_saved
+}
+
+# Return 1 if the target plus current options supports a vector
+# widening multiplication of *short* args into *int* result, 0 otherwise.
+#
+# This won't change for different subtargets so cache the result.
+
+proc check_effective_target_vect_widen_mult_hi_to_si_pattern { } {
+    global et_vect_widen_mult_hi_to_si_pattern
+
+    if [info exists et_vect_widen_mult_hi_to_si_pattern_saved] {
+        verbose "check_effective_target_vect_widen_mult_hi_to_si_pattern: using cached result" 2
+    } else {
+        set et_vect_widen_mult_hi_to_si_pattern_saved 0
+        if { [istarget powerpc*-*-*]
+              || [istarget spu-*-*]
+              || [istarget ia64-*-*]
+              || [istarget i?86-*-*]
+              || [istarget x86_64-*-*]
+              || ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } {
+            set et_vect_widen_mult_hi_to_si_pattern_saved 1
+        }
+    }
+    verbose "check_effective_target_vect_widen_mult_hi_to_si_pattern: returning $et_vect_widen_mult_hi_to_si_pattern_saved" 2
+    return $et_vect_widen_mult_hi_to_si_pattern_saved
+}
+
+# Return 1 if the target plus current options supports a vector
+# widening shift, 0 otherwise.
+#
+# This won't change for different subtargets so cache the result.
+
+proc check_effective_target_vect_widen_shift { } {
+    global et_vect_widen_shift_saved
+
+    if [info exists et_vect_shift_saved] {
+        verbose "check_effective_target_vect_widen_shift: using cached result" 2
+    } else {
+        set et_vect_widen_shift_saved 0
+        if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } {
+            set et_vect_widen_shift_saved 1
+        }
+    }
+    verbose "check_effective_target_vect_widen_shift: returning $et_vect_widen_shift_saved" 2
+    return $et_vect_widen_shift_saved
+}
+
+# Return 1 if the target plus current options supports a vector
 # dot-product of signed chars, 0 otherwise.
 #
 # This won't change for different subtargets so cache the result.
@@ -3170,29 +3311,6 @@
     return $et_vect_extract_even_odd_saved
 }
 
-# Return 1 if the target supports vector even/odd elements extraction of
-# vectors with SImode elements or larger, 0 otherwise.
-
-proc check_effective_target_vect_extract_even_odd_wide { } {
-    global et_vect_extract_even_odd_wide_saved
-    
-    if [info exists et_vect_extract_even_odd_wide_saved] {
-        verbose "check_effective_target_vect_extract_even_odd_wide: using cached result" 2
-    } else {
-        set et_vect_extract_even_odd_wide_saved 0 
-        if { [istarget powerpc*-*-*] 
-             || [istarget i?86-*-*]
-             || [istarget x86_64-*-*]
-             || [istarget ia64-*-*]
-             || [istarget spu-*-*] } {
-           set et_vect_extract_even_odd_wide_saved 1
-        }
-    }
-
-    verbose "check_effective_target_vect_extract_even_wide_odd: returning $et_vect_extract_even_odd_wide_saved" 2
-    return $et_vect_extract_even_odd_wide_saved
-}
-
 # Return 1 if the target supports vector interleaving, 0 otherwise.
 
 proc check_effective_target_vect_interleave { } {
@@ -3215,41 +3333,66 @@
     return $et_vect_interleave_saved
 }
 
-# Return 1 if the target supports vector interleaving and extract even/odd, 0 otherwise.
-proc check_effective_target_vect_strided { } {
-    global et_vect_strided_saved
+foreach N {2 3 4 8} {
+    eval [string map [list N $N] {
+	# Return 1 if the target supports 2-vector interleaving
+	proc check_effective_target_vect_stridedN { } {
+	    global et_vect_stridedN_saved
 
-    if [info exists et_vect_strided_saved] {
-        verbose "check_effective_target_vect_strided: using cached result" 2
+	    if [info exists et_vect_stridedN_saved] {
+		verbose "check_effective_target_vect_stridedN: using cached result" 2
+	    } else {
+		set et_vect_stridedN_saved 0
+		if { (N & -N) == N
+		     && [check_effective_target_vect_interleave]
+		     && [check_effective_target_vect_extract_even_odd] } {
+		    set et_vect_stridedN_saved 1
+		}
+		if { [istarget arm*-*-*] && N >= 2 && N <= 4 } {
+		    set et_vect_stridedN_saved 1
+		}
+	    }
+
+	    verbose "check_effective_target_vect_stridedN: returning $et_vect_stridedN_saved" 2
+	    return $et_vect_stridedN_saved
+	}
+    }]
+}
+
+# Return 1 if the target supports multiple vector sizes
+
+proc check_effective_target_vect_multiple_sizes { } {
+    global et_vect_multiple_sizes_saved
+
+    if [info exists et_vect_multiple_sizes_saved] {
+        verbose "check_effective_target_vect_multiple_sizes: using cached result" 2
     } else {
-        set et_vect_strided_saved 0
-        if { [check_effective_target_vect_interleave]
-             && [check_effective_target_vect_extract_even_odd] } {
-           set et_vect_strided_saved 1
+        set et_vect_multiple_sizes_saved 0
+          if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } {
+           set et_vect_multiple_sizes_saved 1
         }
     }
 
-    verbose "check_effective_target_vect_strided: returning $et_vect_strided_saved" 2
-    return $et_vect_strided_saved
+    verbose "check_effective_target_vect_multiple_sizes: returning $et_vect_multiple_sizes_saved" 2
+    return $et_vect_multiple_sizes_saved
 }
 
-# Return 1 if the target supports vector interleaving and extract even/odd
-# for wide element types, 0 otherwise.
-proc check_effective_target_vect_strided_wide { } {
-    global et_vect_strided_wide_saved
+# Return 1 if the target supports vectors of 64 bits.
+
+proc check_effective_target_vect64 { } {
+    global et_vect64_saved
 
-    if [info exists et_vect_strided_wide_saved] {
-        verbose "check_effective_target_vect_strided_wide: using cached result" 2
+    if [info exists et_vect64_saved] {
+        verbose "check_effective_target_vect64: using cached result" 2
     } else {
-        set et_vect_strided_wide_saved 0
-        if { [check_effective_target_vect_interleave]
-             && [check_effective_target_vect_extract_even_odd_wide] } {
-           set et_vect_strided_wide_saved 1
+        set et_vect64_saved 0
+        if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } {
+           set et_vect64_saved 1
         }
     }
 
-    verbose "check_effective_target_vect_strided_wide: returning $et_vect_strided_wide_saved" 2
-    return $et_vect_strided_wide_saved
+    verbose "check_effective_target_vect64: returning $et_vect64_saved" 2
+    return $et_vect64_saved
 }
 
 # Return 1 if the target supports section-anchors
@@ -3302,6 +3445,31 @@
     return $et_sync_int_long_saved
 }
 
+# Return 1 if the target supports atomic operations on "long long" and can
+# execute them
+# So far only put checks in for ARM, others may want to add their own
+proc check_effective_target_sync_longlong { } {
+    return [check_runtime sync_longlong_runtime {
+      #include <stdlib.h>
+      int main ()
+      {
+	long long l1;
+
+	if (sizeof (long long) != 8)
+	  exit (1);
+
+      #ifdef __arm__
+	/* Just check for native; checking for kernel fallback is tricky.  */
+	asm volatile ("ldrexd r0,r1, [%0]" : : "r" (&l1) : "r0", "r1");
+      #else
+      # error "Add other suitable archs here"
+      #endif
+
+	exit (0);
+      }
+    } "" ]
+}
+
 # Return 1 if the target supports atomic operations on "char" and "short".
 
 proc check_effective_target_sync_char_short { } {
@@ -3635,11 +3803,11 @@
     return $flags
 }
 
-# Add to FLAGS the flags needed to enable 128-bit vectors.
+# Add to FLAGS the flags needed to enable 64-bit vectors.
 
-proc add_options_for_quad_vectors { flags } {
+proc add_options_for_double_vectors { flags } {
     if [is-effective-target arm_neon_ok] {
-	return "$flags -mvectorize-with-neon-quad"
+	return "$flags -mvectorize-with-neon-double"
     }
 
     return $flags
--- a/src/gcc/tree-affine.c
+++ b/src/gcc/tree-affine.c
@@ -887,3 +887,30 @@
   *size = shwi_to_double_int ((bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT);
 }
 
+/* Returns true if a region of size SIZE1 at position 0 and a region of
+   size SIZE2 at position DIFF cannot overlap.  */
+
+bool
+aff_comb_cannot_overlap_p (aff_tree *diff, double_int size1, double_int size2)
+{
+  double_int d, bound;
+
+  /* Unless the difference is a constant, we fail.  */
+  if (diff->n != 0)
+    return false;
+
+  d = diff->offset;
+  if (double_int_negative_p (d))
+    {
+      /* The second object is before the first one, we succeed if the last
+	 element of the second object is before the start of the first one.  */
+      bound = double_int_add (d, double_int_add (size2, double_int_minus_one));
+      return double_int_negative_p (bound);
+    }
+  else
+    {
+      /* We succeed if the second object starts after the first one ends.  */
+      return double_int_scmp (size1, d) <= 0;
+    }
+}
+
--- a/src/gcc/tree-affine.h
+++ b/src/gcc/tree-affine.h
@@ -76,6 +76,7 @@
 				     struct pointer_map_t **);
 void get_inner_reference_aff (tree, aff_tree *, double_int *);
 void free_affine_expand_cache (struct pointer_map_t **);
+bool aff_comb_cannot_overlap_p (aff_tree *, double_int, double_int);
 
 /* Debugging functions.  */
 void print_aff (FILE *, aff_tree *);
--- a/src/gcc/tree.c
+++ b/src/gcc/tree.c
@@ -7321,6 +7321,15 @@
   return build_array_type_1 (elt_type, index_type, false);
 }
 
+/* Return a representation of ELT_TYPE[NELTS], using indices of type
+   sizetype.  */
+
+tree
+build_array_type_nelts (tree elt_type, unsigned HOST_WIDE_INT nelts)
+{
+  return build_array_type (elt_type, build_index_type (size_int (nelts - 1)));
+}
+
 /* Recursively examines the array elements of TYPE, until a non-array
    element type is found.  */
 
--- a/src/gcc/tree-cfg.c
+++ b/src/gcc/tree-cfg.c
@@ -3046,7 +3046,26 @@
   tree fntype;
   unsigned i;
 
-  if (TREE_CODE (fn) != OBJ_TYPE_REF
+  if (gimple_call_internal_p (stmt))
+    {
+      if (fn)
+	{
+	  error ("gimple call has two targets");
+	  debug_generic_stmt (fn);
+	  return true;
+	}
+    }
+  else
+    {
+      if (!fn)
+	{
+	  error ("gimple call has no target");
+	  return true;
+	}
+    }
+
+  if (fn
+      && TREE_CODE (fn) != OBJ_TYPE_REF
       && !is_gimple_val (fn))
     {
       error ("invalid function in gimple call");
@@ -3054,9 +3073,10 @@
       return true;
     }
 
-  if (!POINTER_TYPE_P (TREE_TYPE  (fn))
-      || (TREE_CODE (TREE_TYPE (TREE_TYPE (fn))) != FUNCTION_TYPE
-	  && TREE_CODE (TREE_TYPE (TREE_TYPE (fn))) != METHOD_TYPE))
+  if (fn
+      && (!POINTER_TYPE_P (TREE_TYPE  (fn))
+	  || (TREE_CODE (TREE_TYPE (TREE_TYPE (fn))) != FUNCTION_TYPE
+	      && TREE_CODE (TREE_TYPE (TREE_TYPE (fn))) != METHOD_TYPE)))
     {
       error ("non-function in gimple call");
       return true;
@@ -3076,8 +3096,12 @@
       return true;
     }
 
-  fntype = TREE_TYPE (TREE_TYPE (fn));
-  if (gimple_call_lhs (stmt)
+  if (fn)
+    fntype = TREE_TYPE (TREE_TYPE (fn));
+  else
+    fntype = NULL_TREE;
+  if (fntype
+      && gimple_call_lhs (stmt)
       && !useless_type_conversion_p (TREE_TYPE (gimple_call_lhs (stmt)),
 				     TREE_TYPE (fntype))
       /* ???  At least C++ misses conversions at assignments from
@@ -3449,6 +3473,44 @@
 	return false;
       }
 
+    case WIDEN_LSHIFT_EXPR:
+      {
+        if (!INTEGRAL_TYPE_P (lhs_type)
+            || !INTEGRAL_TYPE_P (rhs1_type)
+            || TREE_CODE (rhs2) != INTEGER_CST
+            || (2 * TYPE_PRECISION (rhs1_type) > TYPE_PRECISION (lhs_type)))
+          {
+            error ("type mismatch in widening vector shift expression");
+            debug_generic_expr (lhs_type);
+            debug_generic_expr (rhs1_type);
+            debug_generic_expr (rhs2_type);
+            return true;
+          }
+
+        return false;
+      }
+
+    case VEC_WIDEN_LSHIFT_HI_EXPR:
+    case VEC_WIDEN_LSHIFT_LO_EXPR:
+      {
+        if (TREE_CODE (rhs1_type) != VECTOR_TYPE
+            || TREE_CODE (lhs_type) != VECTOR_TYPE
+            || !INTEGRAL_TYPE_P (TREE_TYPE (rhs1_type))
+            || !INTEGRAL_TYPE_P (TREE_TYPE (lhs_type))
+            || TREE_CODE (rhs2) != INTEGER_CST
+            || (2 * TYPE_PRECISION (TREE_TYPE (rhs1_type))
+                > TYPE_PRECISION (TREE_TYPE (lhs_type))))
+          {
+            error ("type mismatch in widening vector shift expression");
+            debug_generic_expr (lhs_type);
+            debug_generic_expr (rhs1_type);
+            debug_generic_expr (rhs2_type);
+            return true;
+          }
+
+        return false;
+      }
+
     case PLUS_EXPR:
     case MINUS_EXPR:
       {
@@ -3550,7 +3612,7 @@
     case WIDEN_MULT_EXPR:
       if (TREE_CODE (lhs_type) != INTEGER_TYPE)
 	return true;
-      return ((2 * TYPE_PRECISION (rhs1_type) != TYPE_PRECISION (lhs_type))
+      return ((2 * TYPE_PRECISION (rhs1_type) > TYPE_PRECISION (lhs_type))
 	      || (TYPE_PRECISION (rhs1_type) != TYPE_PRECISION (rhs2_type)));
 
     case WIDEN_SUM_EXPR:
@@ -3643,7 +3705,7 @@
 	   && !FIXED_POINT_TYPE_P (rhs1_type))
 	  || !useless_type_conversion_p (rhs1_type, rhs2_type)
 	  || !useless_type_conversion_p (lhs_type, rhs3_type)
-	  || 2 * TYPE_PRECISION (rhs1_type) != TYPE_PRECISION (lhs_type)
+	  || 2 * TYPE_PRECISION (rhs1_type) > TYPE_PRECISION (lhs_type)
 	  || TYPE_PRECISION (rhs1_type) != TYPE_PRECISION (rhs2_type))
 	{
 	  error ("type mismatch in widening multiply-accumulate expression");
@@ -4130,9 +4192,10 @@
      didn't see a function declaration before the call.  */
   if (is_gimple_call (stmt))
     {
-      tree decl;
+      tree fn, decl;
 
-      if (!is_gimple_call_addr (gimple_call_fn (stmt)))
+      fn = gimple_call_fn (stmt);
+      if (fn && !is_gimple_call_addr (fn))
 	{
 	  error ("invalid function in call statement");
 	  return true;
@@ -7503,6 +7566,8 @@
 	case GIMPLE_CALL:
 	  if (gimple_call_lhs (g))
 	    break;
+	  if (gimple_call_internal_p (g))
+	    break;
 
 	  /* This is a naked call, as opposed to a GIMPLE_CALL with an
 	     LHS.  All calls whose value is ignored should be
--- a/src/gcc/tree-data-ref.c
+++ b/src/gcc/tree-data-ref.c
@@ -1,5 +1,5 @@
 /* Data references and dependences detectors.
-   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
+   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
    Free Software Foundation, Inc.
    Contributed by Sebastian Pop <pop@cri.ensmp.fr>
 
@@ -84,6 +84,7 @@
 #include "tree-scalar-evolution.h"
 #include "tree-pass.h"
 #include "langhooks.h"
+#include "tree-affine.h"
 
 static struct datadep_stats
 {
@@ -721,11 +722,11 @@
 }
 
 /* Analyzes the behavior of the memory reference DR in the innermost loop or
-   basic block that contains it. Returns true if analysis succeed or false
+   basic block that contains it.  Returns true if analysis succeed or false
    otherwise.  */
 
 bool
-dr_analyze_innermost (struct data_reference *dr)
+dr_analyze_innermost (struct data_reference *dr, struct loop *nest)
 {
   gimple stmt = DR_STMT (dr);
   struct loop *loop = loop_containing_stmt (stmt);
@@ -768,14 +769,25 @@
     }
   else
     base = build_fold_addr_expr (base);
+
   if (in_loop)
     {
       if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
                       false))
         {
-          if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "failed: evolution of base is not affine.\n");
-          return false;
+          if (nest)
+            {
+              if (dump_file && (dump_flags & TDF_DETAILS))
+                fprintf (dump_file, "failed: evolution of base is not"
+                                    " affine.\n");
+              return false;
+            }
+          else
+            {
+              base_iv.base = base;
+              base_iv.step = ssize_int (0);
+              base_iv.no_overflow = true;
+            }
         }
     }
   else
@@ -800,10 +812,18 @@
       else if (!simple_iv (loop, loop_containing_stmt (stmt),
                            poffset, &offset_iv, false))
         {
-          if (dump_file && (dump_flags & TDF_DETAILS))
-            fprintf (dump_file, "failed: evolution of offset is not"
-                                " affine.\n");
-          return false;
+          if (nest)
+            {
+              if (dump_file && (dump_flags & TDF_DETAILS))
+                fprintf (dump_file, "failed: evolution of offset is not"
+                                    " affine.\n");
+              return false;
+            }
+          else
+            {
+              offset_iv.base = poffset;
+              offset_iv.step = ssize_int (0);
+            }
         }
     }
 
@@ -842,30 +862,30 @@
   tree base, off, access_fn = NULL_TREE;
   basic_block before_loop = NULL;
 
-  if (nest)
-    before_loop = block_before_loop (nest);
+  if (!nest)
+    {
+      DR_BASE_OBJECT (dr) = ref;
+      DR_ACCESS_FNS (dr) = NULL;
+      return;
+    }
+
+  before_loop = block_before_loop (nest);
 
   while (handled_component_p (aref))
     {
       if (TREE_CODE (aref) == ARRAY_REF)
 	{
 	  op = TREE_OPERAND (aref, 1);
-	  if (nest)
-	    {
-  	      access_fn = analyze_scalar_evolution (loop, op);
-	      access_fn = instantiate_scev (before_loop, loop, access_fn);
-	      VEC_safe_push (tree, heap, access_fns, access_fn);
-	    }
-
+  	  access_fn = analyze_scalar_evolution (loop, op);
+	  access_fn = instantiate_scev (before_loop, loop, access_fn);
+	  VEC_safe_push (tree, heap, access_fns, access_fn);
 	  TREE_OPERAND (aref, 1) = build_int_cst (TREE_TYPE (op), 0);
 	}
 
       aref = TREE_OPERAND (aref, 0);
     }
 
-  if (nest
-      && (INDIRECT_REF_P (aref)
-	  || TREE_CODE (aref) == MEM_REF))
+  if (INDIRECT_REF_P (aref) || TREE_CODE (aref) == MEM_REF)
     {
       op = TREE_OPERAND (aref, 0);
       access_fn = analyze_scalar_evolution (loop, op);
@@ -967,7 +987,7 @@
   DR_REF (dr) = memref;
   DR_IS_READ (dr) = is_read;
 
-  dr_analyze_innermost (dr);
+  dr_analyze_innermost (dr, nest);
   dr_analyze_indices (dr, nest, loop);
   dr_analyze_alias (dr);
 
@@ -991,6 +1011,48 @@
   return dr;
 }
 
+/* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
+   expressions.  */
+static bool
+dr_equal_offsets_p1 (tree offset1, tree offset2)
+{
+  bool res;
+
+  STRIP_NOPS (offset1);
+  STRIP_NOPS (offset2);
+
+  if (offset1 == offset2)
+    return true;
+
+  if (TREE_CODE (offset1) != TREE_CODE (offset2)
+      || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
+    return false;
+
+  res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
+                             TREE_OPERAND (offset2, 0));
+
+  if (!res || !BINARY_CLASS_P (offset1))
+    return res;
+
+  res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
+                             TREE_OPERAND (offset2, 1));
+
+  return res;
+}
+
+/* Check if DRA and DRB have equal offsets.  */
+bool
+dr_equal_offsets_p (struct data_reference *dra,
+                    struct data_reference *drb)
+{
+  tree offset1, offset2;
+
+  offset1 = DR_OFFSET (dra);
+  offset2 = DR_OFFSET (drb);
+
+  return dr_equal_offsets_p1 (offset1, offset2);
+}
+
 /* Returns true if FNA == FNB.  */
 
 static bool
@@ -1240,14 +1302,33 @@
 }
 
 /* Returns false if we can prove that data references A and B do not alias,
-   true otherwise.  */
+   true otherwise.  If LOOP_NEST is false no cross-iteration aliases are
+   considered.  */
 
 bool
-dr_may_alias_p (const struct data_reference *a, const struct data_reference *b)
+dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
+		bool loop_nest)
 {
   tree addr_a = DR_BASE_OBJECT (a);
   tree addr_b = DR_BASE_OBJECT (b);
 
+  /* If we are not processing a loop nest but scalar code we
+     do not need to care about possible cross-iteration dependences
+     and thus can process the full original reference.  Do so,
+     similar to how loop invariant motion applies extra offset-based
+     disambiguation.  */
+  if (!loop_nest)
+    {
+      aff_tree off1, off2;
+      double_int size1, size2;
+      get_inner_reference_aff (DR_REF (a), &off1, &size1);
+      get_inner_reference_aff (DR_REF (b), &off2, &size2);
+      aff_combination_scale (&off1, double_int_minus_one);
+      aff_combination_add (&off2, &off1);
+      if (aff_comb_cannot_overlap_p (&off2, size1, size2))
+	return false;
+    }
+
   if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
     return refs_output_dependent_p (addr_a, addr_b);
   else if (DR_IS_READ (a) && DR_IS_WRITE (b))
@@ -1285,7 +1366,7 @@
     }
 
   /* If the data references do not alias, then they are independent.  */
-  if (!dr_may_alias_p (a, b))
+  if (!dr_may_alias_p (a, b, loop_nest != NULL))
     {
       DDR_ARE_DEPENDENT (res) = chrec_known;
       return res;
@@ -4162,7 +4243,7 @@
   if ((stmt_code == GIMPLE_CALL
        && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
       || (stmt_code == GIMPLE_ASM
-	  && gimple_asm_volatile_p (stmt)))
+	  && (gimple_asm_volatile_p (stmt) || gimple_vuse (stmt))))
     clobbers_memory = true;
 
   if (!gimple_vuse (stmt))
@@ -4294,7 +4375,7 @@
    DATAREFS.  Returns chrec_dont_know when failing to analyze a
    difficult case, returns NULL_TREE otherwise.  */
 
-static tree
+tree
 find_data_references_in_bb (struct loop *loop, basic_block bb,
                             VEC (data_reference_p, heap) **datarefs)
 {
@@ -5143,7 +5224,7 @@
   DR_STMT (dr) = stmt;
   DR_REF (dr) = op0;
 
-  res = dr_analyze_innermost (dr)
+  res = dr_analyze_innermost (dr, loop_containing_stmt (stmt))
     && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0));
 
   free_data_ref (dr);
@@ -5183,7 +5264,7 @@
 
   DR_STMT (dr) = stmt;
   DR_REF (dr) = *ref->pos;
-  dr_analyze_innermost (dr);
+  dr_analyze_innermost (dr, loop_containing_stmt (stmt));
   base_address = DR_BASE_ADDRESS (dr);
 
   if (!base_address)
--- a/src/gcc/tree-data-ref.h
+++ b/src/gcc/tree-data-ref.h
@@ -386,7 +386,7 @@
 DEF_VEC_ALLOC_O (data_ref_loc, heap);
 
 bool get_references_in_stmt (gimple, VEC (data_ref_loc, heap) **);
-bool dr_analyze_innermost (struct data_reference *);
+bool dr_analyze_innermost (struct data_reference *, struct loop *);
 extern bool compute_data_dependences_for_loop (struct loop *, bool,
 					       VEC (loop_p, heap) **,
 					       VEC (data_reference_p, heap) **,
@@ -426,10 +426,14 @@
 extern void compute_all_dependences (VEC (data_reference_p, heap) *,
 				     VEC (ddr_p, heap) **, VEC (loop_p, heap) *,
 				     bool);
+extern tree find_data_references_in_bb (struct loop *, basic_block,
+                                        VEC (data_reference_p, heap) **);
 
 extern void create_rdg_vertices (struct graph *, VEC (gimple, heap) *);
 extern bool dr_may_alias_p (const struct data_reference *,
-			    const struct data_reference *);
+			    const struct data_reference *, bool);
+extern bool dr_equal_offsets_p (struct data_reference *,
+                                struct data_reference *);
 
 
 /* Return true when the base objects of data references A and B are
--- a/src/gcc/tree.def
+++ b/src/gcc/tree.def
@@ -1092,6 +1092,19 @@
    is subtracted from t3.  */
 DEFTREECODE (WIDEN_MULT_MINUS_EXPR, "widen_mult_minus_expr", tcc_expression, 3)
 
+/* Widening shift left.
+   The first operand is of type t1.
+   The second operand is the number of bits to shift by; it need not be the
+   same type as the first operand and result.
+   Note that the result is undefined if the second operand is larger
+   than or equal to the first operand's type size.
+   The type of the entire expression is t2, such that t2 is at least twice
+   the size of t1.
+   WIDEN_LSHIFT_EXPR is equivalent to first widening (promoting)
+   the first argument from type t1 to type t2, and then shifting it
+   by the second argument.  */
+DEFTREECODE (WIDEN_LSHIFT_EXPR, "widen_lshift_expr", tcc_binary, 2)
+
 /* Fused multiply-add.
    All operands and the result are of the same type.  No intermediate
    rounding is performed after multiplying operand one with operand two
@@ -1147,6 +1160,16 @@
 DEFTREECODE (VEC_INTERLEAVE_HIGH_EXPR, "vec_interleavehigh_expr", tcc_binary, 2)
 DEFTREECODE (VEC_INTERLEAVE_LOW_EXPR, "vec_interleavelow_expr", tcc_binary, 2)
 
+/* Widening vector shift left in bits.
+   Operand 0 is a vector to be shifted with N elements of size S.
+   Operand 1 is an integer shift amount in bits.
+   The result of the operation is N elements of size 2*S.
+   VEC_WIDEN_LSHIFT_HI_EXPR computes the N/2 high results.
+   VEC_WIDEN_LSHIFT_LO_EXPR computes the N/2 low results.
+ */
+DEFTREECODE (VEC_WIDEN_LSHIFT_HI_EXPR, "widen_lshift_hi_expr", tcc_binary, 2)
+DEFTREECODE (VEC_WIDEN_LSHIFT_LO_EXPR, "widen_lshift_lo_expr", tcc_binary, 2)
+
 /* PREDICT_EXPR.  Specify hint for branch prediction.  The
    PREDICT_EXPR_PREDICTOR specify predictor and PREDICT_EXPR_OUTCOME the
    outcome (0 for not taken and 1 for taken).  Once the profile is guessed
--- a/src/gcc/tree-eh.c
+++ b/src/gcc/tree-eh.c
@@ -2752,7 +2752,7 @@
       || gimple_call_lhs (twos)
       || gimple_call_chain (ones)
       || gimple_call_chain (twos)
-      || !operand_equal_p (gimple_call_fn (ones), gimple_call_fn (twos), 0)
+      || !gimple_call_same_target_p (ones, twos)
       || gimple_call_num_args (ones) != gimple_call_num_args (twos))
     return false;
 
--- a/src/gcc/tree.h
+++ b/src/gcc/tree.h
@@ -4197,6 +4197,7 @@
 extern tree build_index_type (tree);
 extern tree build_array_type (tree, tree);
 extern tree build_nonshared_array_type (tree, tree);
+extern tree build_array_type_nelts (tree, unsigned HOST_WIDE_INT);
 extern tree build_function_type (tree, tree);
 extern tree build_function_type_list (tree, ...);
 extern tree build_function_type_skip_args (tree, bitmap);
@@ -4626,21 +4627,10 @@
 
 extern VEC(tree,gc) *ctor_to_vec (tree);
 
-/* Examine CTOR to discover:
-   * how many scalar fields are set to nonzero values,
-     and place it in *P_NZ_ELTS;
-   * how many scalar fields in total are in CTOR,
-     and place it in *P_ELT_COUNT.
-   * if a type is a union, and the initializer from the constructor
-     is not the largest element in the union, then set *p_must_clear.
+extern bool categorize_ctor_elements (const_tree, HOST_WIDE_INT *,
+				      HOST_WIDE_INT *, bool *);
 
-   Return whether or not CTOR is a valid static constant initializer, the same
-   as "initializer_constant_valid_p (CTOR, TREE_TYPE (CTOR)) != 0".  */
-
-extern bool categorize_ctor_elements (const_tree, HOST_WIDE_INT *, HOST_WIDE_INT *,
-				      bool *);
-
-extern HOST_WIDE_INT count_type_elements (const_tree, bool);
+extern bool complete_ctor_at_level_p (const_tree, HOST_WIDE_INT, const_tree);
 
 /* integer_zerop (tree x) is nonzero if X is an integer constant of value 0.  */
 
--- a/src/gcc/tree-if-conv.c
+++ b/src/gcc/tree-if-conv.c
@@ -464,8 +464,8 @@
 /* Returns true when the memory references of STMT are read or written
    unconditionally.  In other words, this function returns true when
    for every data reference A in STMT there exist other accesses to
-   the same data reference with predicates that add up (OR-up) to the
-   true predicate: this ensures that the data reference A is touched
+   a data reference with the same base with predicates that add up (OR-up) to
+   the true predicate: this ensures that the data reference A is touched
    (read or written) on every iteration of the if-converted loop.  */
 
 static bool
@@ -489,21 +489,38 @@
 	  continue;
 
 	for (j = 0; VEC_iterate (data_reference_p, drs, j, b); j++)
-	  if (DR_STMT (b) != stmt
-	      && same_data_refs (a, b))
-	    {
-	      tree cb = bb_predicate (gimple_bb (DR_STMT (b)));
-
-	      if (DR_RW_UNCONDITIONALLY (b) == 1
-		  || is_true_predicate (cb)
-		  || is_true_predicate (ca = fold_or_predicates (EXPR_LOCATION (cb),
-								 ca, cb)))
-		{
-		  DR_RW_UNCONDITIONALLY (a) = 1;
-		  DR_RW_UNCONDITIONALLY (b) = 1;
-		  found = true;
-		  break;
-		}
+          {
+            tree ref_base_a = DR_REF (a);
+            tree ref_base_b = DR_REF (b);
+
+            if (DR_STMT (b) == stmt)
+              continue;
+
+            while (TREE_CODE (ref_base_a) == COMPONENT_REF
+                   || TREE_CODE (ref_base_a) == IMAGPART_EXPR
+                   || TREE_CODE (ref_base_a) == REALPART_EXPR)
+              ref_base_a = TREE_OPERAND (ref_base_a, 0);
+
+            while (TREE_CODE (ref_base_b) == COMPONENT_REF
+                   || TREE_CODE (ref_base_b) == IMAGPART_EXPR
+                   || TREE_CODE (ref_base_b) == REALPART_EXPR)
+              ref_base_b = TREE_OPERAND (ref_base_b, 0);
+
+  	    if (!operand_equal_p (ref_base_a, ref_base_b, 0))
+	      {
+	        tree cb = bb_predicate (gimple_bb (DR_STMT (b)));
+
+	        if (DR_RW_UNCONDITIONALLY (b) == 1
+		    || is_true_predicate (cb)
+		    || is_true_predicate (ca
+                        = fold_or_predicates (EXPR_LOCATION (cb), ca, cb)))
+		  {
+		    DR_RW_UNCONDITIONALLY (a) = 1;
+  		    DR_RW_UNCONDITIONALLY (b) = 1;
+		    found = true;
+		    break;
+		  }
+               }
 	    }
 
 	if (!found)
--- a/src/gcc/tree-inline.c
+++ b/src/gcc/tree-inline.c
@@ -3343,6 +3343,7 @@
     case DOT_PROD_EXPR:
     case WIDEN_MULT_PLUS_EXPR:
     case WIDEN_MULT_MINUS_EXPR:
+    case WIDEN_LSHIFT_EXPR:
 
     case VEC_WIDEN_MULT_HI_EXPR:
     case VEC_WIDEN_MULT_LO_EXPR:
@@ -3357,6 +3358,8 @@
     case VEC_EXTRACT_ODD_EXPR:
     case VEC_INTERLEAVE_HIGH_EXPR:
     case VEC_INTERLEAVE_LOW_EXPR:
+    case VEC_WIDEN_LSHIFT_HI_EXPR:
+    case VEC_WIDEN_LSHIFT_LO_EXPR:
 
       return 1;
 
@@ -3474,10 +3477,13 @@
       {
 	tree decl = gimple_call_fndecl (stmt);
 	tree addr = gimple_call_fn (stmt);
-	tree funtype = TREE_TYPE (addr);
+	tree funtype = NULL_TREE;
 	bool stdarg = false;
 
-	if (POINTER_TYPE_P (funtype))
+	if (addr)
+	  funtype = TREE_TYPE (addr);
+
+	if (funtype && POINTER_TYPE_P (funtype))
 	  funtype = TREE_TYPE (funtype);
 
 	/* Do not special case builtins where we see the body.
@@ -3517,7 +3523,7 @@
 	if (decl)
 	  funtype = TREE_TYPE (decl);
 
-	if (!VOID_TYPE_P (TREE_TYPE (funtype)))
+	if (funtype && !VOID_TYPE_P (TREE_TYPE (funtype)))
 	  cost += estimate_move_cost (TREE_TYPE (funtype));
 
 	if (funtype)
--- a/src/gcc/tree-loop-distribution.c
+++ b/src/gcc/tree-loop-distribution.c
@@ -312,7 +312,7 @@
 
   DR_STMT (dr) = stmt;
   DR_REF (dr) = op0;
-  res = dr_analyze_innermost (dr);
+  res = dr_analyze_innermost (dr, loop_containing_stmt (stmt));
   gcc_assert (res && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0)));
 
   nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list);
--- a/src/gcc/tree-predcom.c
+++ b/src/gcc/tree-predcom.c
@@ -1114,7 +1114,7 @@
   memset (&init_dr, 0, sizeof (struct data_reference));
   DR_REF (&init_dr) = init_ref;
   DR_STMT (&init_dr) = phi;
-  if (!dr_analyze_innermost (&init_dr))
+  if (!dr_analyze_innermost (&init_dr, loop))
     return NULL;
 
   if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))
--- a/src/gcc/tree-pretty-print.c
+++ b/src/gcc/tree-pretty-print.c
@@ -1543,6 +1543,7 @@
     case RROTATE_EXPR:
     case VEC_LSHIFT_EXPR:
     case VEC_RSHIFT_EXPR:
+    case WIDEN_LSHIFT_EXPR:
     case BIT_IOR_EXPR:
     case BIT_XOR_EXPR:
     case BIT_AND_EXPR:
@@ -2213,6 +2214,22 @@
       pp_string (buffer, " > ");
       break;
 
+    case VEC_WIDEN_LSHIFT_HI_EXPR:
+      pp_string (buffer, " VEC_WIDEN_LSHIFT_HI_EXPR < ");
+      dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false);
+      pp_string (buffer, ", ");
+      dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false);
+      pp_string (buffer, " > ");
+      break;
+
+    case VEC_WIDEN_LSHIFT_LO_EXPR:
+      pp_string (buffer, " VEC_WIDEN_LSHIFT_HI_EXPR < ");
+      dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false);
+      pp_string (buffer, ", ");
+      dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false);
+      pp_string (buffer, " > ");
+      break;
+
     case VEC_UNPACK_HI_EXPR:
       pp_string (buffer, " VEC_UNPACK_HI_EXPR < ");
       dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false);
@@ -2535,6 +2552,9 @@
     case RSHIFT_EXPR:
     case LROTATE_EXPR:
     case RROTATE_EXPR:
+    case VEC_WIDEN_LSHIFT_HI_EXPR:
+    case VEC_WIDEN_LSHIFT_LO_EXPR:
+    case WIDEN_LSHIFT_EXPR:
       return 11;
 
     case WIDEN_SUM_EXPR:
@@ -2710,6 +2730,9 @@
     case VEC_RSHIFT_EXPR:
       return "v>>";
 
+    case WIDEN_LSHIFT_EXPR:
+      return "w<<";
+
     case POINTER_PLUS_EXPR:
       return "+";
 
--- a/src/gcc/tree-ssa-ccp.c
+++ b/src/gcc/tree-ssa-ccp.c
@@ -522,10 +522,6 @@
     val = bit_value_binop (PLUS_EXPR, TREE_TYPE (expr),
 			   TREE_OPERAND (base, 0), TREE_OPERAND (base, 1));
   else if (base
-	   /* ???  While function decls have DECL_ALIGN their addresses
-	      may encode extra information in the lower bits on some
-	      targets (PR47239).  Simply punt for function decls for now.  */
-	   && TREE_CODE (base) != FUNCTION_DECL
 	   && ((align = get_object_alignment (base, BIGGEST_ALIGNMENT))
 		> BITS_PER_UNIT))
     {
@@ -1279,7 +1275,10 @@
 
     case GIMPLE_CALL:
       {
-	tree fn = valueize_op (gimple_call_fn (stmt));
+	tree fn = gimple_call_fn (stmt);
+	if (!fn)
+	  return NULL_TREE;
+	fn = valueize_op (fn);
 	if (TREE_CODE (fn) == ADDR_EXPR
 	    && TREE_CODE (TREE_OPERAND (fn, 0)) == FUNCTION_DECL
 	    && DECL_BUILT_IN (TREE_OPERAND (fn, 0)))
@@ -2321,6 +2320,11 @@
 	    return true;
 	  }
 
+	/* Internal calls provide no argument types, so the extra laxity
+	   for normal calls does not apply.  */
+	if (gimple_call_internal_p (stmt))
+	  return false;
+
 	/* Propagate into the call arguments.  Compared to replace_uses_in
 	   this can use the argument slot types for type verification
 	   instead of the current argument type.  We also can safely
--- a/src/gcc/tree-ssa-dom.c
+++ b/src/gcc/tree-ssa-dom.c
@@ -64,7 +64,7 @@
     struct { enum tree_code op;  tree opnd; } unary;
     struct { enum tree_code op;  tree opnd0, opnd1; } binary;
     struct { enum tree_code op;  tree opnd0, opnd1, opnd2; } ternary;
-    struct { tree fn; bool pure; size_t nargs; tree *args; } call;
+    struct { gimple fn_from; bool pure; size_t nargs; tree *args; } call;
   } ops;
 };
 
@@ -257,7 +257,7 @@
 
       expr->type = TREE_TYPE (gimple_call_lhs (stmt));
       expr->kind = EXPR_CALL;
-      expr->ops.call.fn = gimple_call_fn (stmt);
+      expr->ops.call.fn_from = stmt;
 
       if (gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE))
         expr->ops.call.pure = true;
@@ -421,8 +421,8 @@
 
         /* If the calls are to different functions, then they
            clearly cannot be equal.  */
-        if (! operand_equal_p (expr0->ops.call.fn,
-                               expr1->ops.call.fn, 0))
+        if (!gimple_call_same_target_p (expr0->ops.call.fn_from,
+                                        expr1->ops.call.fn_from))
           return false;
 
         if (! expr0->ops.call.pure)
@@ -502,9 +502,15 @@
       {
         size_t i;
         enum tree_code code = CALL_EXPR;
+        gimple fn_from;
 
         val = iterative_hash_object (code, val);
-        val = iterative_hash_expr (expr->ops.call.fn, val);
+        fn_from = expr->ops.call.fn_from;
+        if (gimple_call_internal_p (fn_from))
+          val = iterative_hash_hashval_t
+            ((hashval_t) gimple_call_internal_fn (fn_from), val);
+        else
+          val = iterative_hash_expr (gimple_call_fn (fn_from), val);
         for (i = 0; i < expr->ops.call.nargs; i++)
           val = iterative_hash_expr (expr->ops.call.args[i], val);
       }
@@ -564,8 +570,14 @@
         {
           size_t i;
           size_t nargs = element->expr.ops.call.nargs;
+          gimple fn_from;
 
-          print_generic_expr (stream, element->expr.ops.call.fn, 0);
+          fn_from = element->expr.ops.call.fn_from;
+          if (gimple_call_internal_p (fn_from))
+            fputs (internal_fn_name (gimple_call_internal_fn (fn_from)),
+                   stream);
+          else
+            print_generic_expr (stream, gimple_call_fn (fn_from), 0);
           fprintf (stream, " (");
           for (i = 0; i < nargs; i++)
             {
--- a/src/gcc/tree-ssa-loop-im.c
+++ b/src/gcc/tree-ssa-loop-im.c
@@ -1834,33 +1834,6 @@
   create_vop_ref_mapping ();
 }
 
-/* Returns true if a region of size SIZE1 at position 0 and a region of
-   size SIZE2 at position DIFF cannot overlap.  */
-
-static bool
-cannot_overlap_p (aff_tree *diff, double_int size1, double_int size2)
-{
-  double_int d, bound;
-
-  /* Unless the difference is a constant, we fail.  */
-  if (diff->n != 0)
-    return false;
-
-  d = diff->offset;
-  if (double_int_negative_p (d))
-    {
-      /* The second object is before the first one, we succeed if the last
-	 element of the second object is before the start of the first one.  */
-      bound = double_int_add (d, double_int_add (size2, double_int_minus_one));
-      return double_int_negative_p (bound);
-    }
-  else
-    {
-      /* We succeed if the second object starts after the first one ends.  */
-      return double_int_scmp (size1, d) <= 0;
-    }
-}
-
 /* Returns true if MEM1 and MEM2 may alias.  TTAE_CACHE is used as a cache in
    tree_to_aff_combination_expand.  */
 
@@ -1889,7 +1862,7 @@
   aff_combination_scale (&off1, double_int_minus_one);
   aff_combination_add (&off2, &off1);
 
-  if (cannot_overlap_p (&off2, size1, size2))
+  if (aff_comb_cannot_overlap_p (&off2, size1, size2))
     return false;
 
   return true;
--- a/src/gcc/tree-ssa-math-opts.c
+++ b/src/gcc/tree-ssa-math-opts.c
@@ -1266,39 +1266,67 @@
  }
 };
 
-/* Return true if RHS is a suitable operand for a widening multiplication.
+/* Build a gimple assignment to cast VAL to TARGET.  Insert the statement
+   prior to GSI's current position, and return the fresh SSA name.  */
+
+static tree
+build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
+		       tree target, tree val)
+{
+  tree result = make_ssa_name (target, NULL);
+  gimple stmt = gimple_build_assign_with_ops (CONVERT_EXPR, result, val, NULL);
+  gimple_set_location (stmt, loc);
+  gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
+  return result;
+}
+
+/* Return true if RHS is a suitable operand for a widening multiplication,
+   assuming a target type of TYPE.
    There are two cases:
 
-     - RHS makes some value twice as wide.  Store that value in *NEW_RHS_OUT
-       if so, and store its type in *TYPE_OUT.
+     - RHS makes some value at least twice as wide.  Store that value
+       in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
 
      - RHS is an integer constant.  Store that value in *NEW_RHS_OUT if so,
        but leave *TYPE_OUT untouched.  */
 
 static bool
-is_widening_mult_rhs_p (tree rhs, tree *type_out, tree *new_rhs_out)
+is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
+			tree *new_rhs_out)
 {
   gimple stmt;
-  tree type, type1, rhs1;
+  tree type1, rhs1;
   enum tree_code rhs_code;
 
   if (TREE_CODE (rhs) == SSA_NAME)
     {
-      type = TREE_TYPE (rhs);
       stmt = SSA_NAME_DEF_STMT (rhs);
-      if (!is_gimple_assign (stmt))
-	return false;
+      if (is_gimple_assign (stmt))
+	{
+	  rhs_code = gimple_assign_rhs_code (stmt);
+	  if (TREE_CODE (type) == INTEGER_TYPE
+	      ? !CONVERT_EXPR_CODE_P (rhs_code)
+	      : rhs_code != FIXED_CONVERT_EXPR)
+	    rhs1 = rhs;
+	  else
+	    {
+	      rhs1 = gimple_assign_rhs1 (stmt);
 
-      rhs_code = gimple_assign_rhs_code (stmt);
-      if (TREE_CODE (type) == INTEGER_TYPE
-	  ? !CONVERT_EXPR_CODE_P (rhs_code)
-	  : rhs_code != FIXED_CONVERT_EXPR)
-	return false;
+	      if (TREE_CODE (rhs1) == INTEGER_CST)
+		{
+		  *new_rhs_out = rhs1;
+		  *type_out = NULL;
+		  return true;
+		}
+	    }
+	}
+      else
+	rhs1 = rhs;
 
-      rhs1 = gimple_assign_rhs1 (stmt);
       type1 = TREE_TYPE (rhs1);
+
       if (TREE_CODE (type1) != TREE_CODE (type)
-	  || TYPE_PRECISION (type1) * 2 != TYPE_PRECISION (type))
+	  || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
 	return false;
 
       *new_rhs_out = rhs1;
@@ -1316,28 +1344,29 @@
   return false;
 }
 
-/* Return true if STMT performs a widening multiplication.  If so,
-   store the unwidened types of the operands in *TYPE1_OUT and *TYPE2_OUT
-   respectively.  Also fill *RHS1_OUT and *RHS2_OUT such that converting
-   those operands to types *TYPE1_OUT and *TYPE2_OUT would give the
-   operands of the multiplication.  */
+/* Return true if STMT performs a widening multiplication, assuming the
+   output type is TYPE.  If so, store the unwidened types of the operands
+   in *TYPE1_OUT and *TYPE2_OUT respectively.  Also fill *RHS1_OUT and
+   *RHS2_OUT such that converting those operands to types *TYPE1_OUT
+   and *TYPE2_OUT would give the operands of the multiplication.  */
 
 static bool
 is_widening_mult_p (gimple stmt,
 		    tree *type1_out, tree *rhs1_out,
 		    tree *type2_out, tree *rhs2_out)
 {
-  tree type;
+  tree type = TREE_TYPE (gimple_assign_lhs (stmt));
 
-  type = TREE_TYPE (gimple_assign_lhs (stmt));
   if (TREE_CODE (type) != INTEGER_TYPE
       && TREE_CODE (type) != FIXED_POINT_TYPE)
     return false;
 
-  if (!is_widening_mult_rhs_p (gimple_assign_rhs1 (stmt), type1_out, rhs1_out))
+  if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
+			       rhs1_out))
     return false;
 
-  if (!is_widening_mult_rhs_p (gimple_assign_rhs2 (stmt), type2_out, rhs2_out))
+  if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
+			       rhs2_out))
     return false;
 
   if (*type1_out == NULL)
@@ -1354,6 +1383,18 @@
       *type2_out = *type1_out;
     }
 
+  /* Ensure that the larger of the two operands comes first. */
+  if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
+    {
+      tree tmp;
+      tmp = *type1_out;
+      *type1_out = *type2_out;
+      *type2_out = tmp;
+      tmp = *rhs1_out;
+      *rhs1_out = *rhs2_out;
+      *rhs2_out = tmp;
+    }
+
   return true;
 }
 
@@ -1362,10 +1403,15 @@
    value is true iff we converted the statement.  */
 
 static bool
-convert_mult_to_widen (gimple stmt)
+convert_mult_to_widen (gimple stmt, gimple_stmt_iterator *gsi)
 {
-  tree lhs, rhs1, rhs2, type, type1, type2;
+  tree lhs, rhs1, rhs2, type, type1, type2, tmp = NULL;
   enum insn_code handler;
+  enum machine_mode to_mode, from_mode, actual_mode;
+  optab op;
+  int actual_precision;
+  location_t loc = gimple_location (stmt);
+  bool from_unsigned1, from_unsigned2;
 
   lhs = gimple_assign_lhs (stmt);
   type = TREE_TYPE (lhs);
@@ -1375,18 +1421,82 @@
   if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
     return false;
 
-  if (TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2))
-    handler = optab_handler (umul_widen_optab, TYPE_MODE (type));
-  else if (!TYPE_UNSIGNED (type1) && !TYPE_UNSIGNED (type2))
-    handler = optab_handler (smul_widen_optab, TYPE_MODE (type));
+  to_mode = TYPE_MODE (type);
+  from_mode = TYPE_MODE (type1);
+  from_unsigned1 = TYPE_UNSIGNED (type1);
+  from_unsigned2 = TYPE_UNSIGNED (type2);
+
+  if (from_unsigned1 && from_unsigned2)
+    op = umul_widen_optab;
+  else if (!from_unsigned1 && !from_unsigned2)
+    op = smul_widen_optab;
   else
-    handler = optab_handler (usmul_widen_optab, TYPE_MODE (type));
+    op = usmul_widen_optab;
+
+  handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
+						  0, &actual_mode);
 
   if (handler == CODE_FOR_nothing)
-    return false;
+    {
+      if (op != smul_widen_optab)
+	{
+	  /* We can use a signed multiply with unsigned types as long as
+	     there is a wider mode to use, or it is the smaller of the two
+	     types that is unsigned.  Note that type1 >= type2, always.  */
+	  if ((TYPE_UNSIGNED (type1)
+	       && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
+	      || (TYPE_UNSIGNED (type2)
+		  && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
+	    {
+	      from_mode = GET_MODE_WIDER_MODE (from_mode);
+	      if (GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
+		return false;
+	    }
+
+	  op = smul_widen_optab;
+	  handler = find_widening_optab_handler_and_mode (op, to_mode,
+							  from_mode, 0,
+							  &actual_mode);
+
+	  if (handler == CODE_FOR_nothing)
+	    return false;
 
-  gimple_assign_set_rhs1 (stmt, fold_convert (type1, rhs1));
-  gimple_assign_set_rhs2 (stmt, fold_convert (type2, rhs2));
+	  from_unsigned1 = from_unsigned2 = false;
+	}
+      else
+	return false;
+    }
+
+  /* Ensure that the inputs to the handler are in the correct precison
+     for the opcode.  This will be the full mode size.  */
+  actual_precision = GET_MODE_PRECISION (actual_mode);
+  if (actual_precision != TYPE_PRECISION (type1)
+      || from_unsigned1 != TYPE_UNSIGNED (type1))
+    {
+      tmp = create_tmp_var (build_nonstandard_integer_type
+				(actual_precision, from_unsigned1),
+			    NULL);
+      rhs1 = build_and_insert_cast (gsi, loc, tmp, rhs1);
+    }
+  if (actual_precision != TYPE_PRECISION (type2)
+      || from_unsigned2 != TYPE_UNSIGNED (type2))
+    {
+      /* Reuse the same type info, if possible.  */
+      if (!tmp || from_unsigned1 != from_unsigned2)
+	tmp = create_tmp_var (build_nonstandard_integer_type
+				(actual_precision, from_unsigned2),
+			      NULL);
+      rhs2 = build_and_insert_cast (gsi, loc, tmp, rhs2);
+    }
+
+  /* Handle constants.  */
+  if (TREE_CODE (rhs1) == INTEGER_CST)
+    rhs1 = fold_convert (type1, rhs1);
+  if (TREE_CODE (rhs2) == INTEGER_CST)
+    rhs2 = fold_convert (type2, rhs2);
+
+  gimple_assign_set_rhs1 (stmt, rhs1);
+  gimple_assign_set_rhs2 (stmt, rhs2);
   gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
   update_stmt (stmt);
   return true;
@@ -1403,11 +1513,17 @@
 			    enum tree_code code)
 {
   gimple rhs1_stmt = NULL, rhs2_stmt = NULL;
-  tree type, type1, type2;
+  gimple conv1_stmt = NULL, conv2_stmt = NULL, conv_stmt;
+  tree type, type1, type2, optype, tmp = NULL;
   tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
   enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
   optab this_optab;
   enum tree_code wmult_code;
+  enum insn_code handler;
+  enum machine_mode to_mode, from_mode, actual_mode;
+  location_t loc = gimple_location (stmt);
+  int actual_precision;
+  bool from_unsigned1, from_unsigned2;
 
   lhs = gimple_assign_lhs (stmt);
   type = TREE_TYPE (lhs);
@@ -1429,8 +1545,6 @@
       if (is_gimple_assign (rhs1_stmt))
 	rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
     }
-  else
-    return false;
 
   if (TREE_CODE (rhs2) == SSA_NAME)
     {
@@ -1438,57 +1552,160 @@
       if (is_gimple_assign (rhs2_stmt))
 	rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
     }
-  else
-    return false;
 
-  if (code == PLUS_EXPR && rhs1_code == MULT_EXPR)
+  /* Allow for one conversion statement between the multiply
+     and addition/subtraction statement.  If there are more than
+     one conversions then we assume they would invalidate this
+     transformation.  If that's not the case then they should have
+     been folded before now.  */
+  if (CONVERT_EXPR_CODE_P (rhs1_code))
+    {
+      conv1_stmt = rhs1_stmt;
+      rhs1 = gimple_assign_rhs1 (rhs1_stmt);
+      if (TREE_CODE (rhs1) == SSA_NAME)
+	{
+	  rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
+	  if (is_gimple_assign (rhs1_stmt))
+	    rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
+	}
+      else
+	return false;
+    }
+  if (CONVERT_EXPR_CODE_P (rhs2_code))
+    {
+      conv2_stmt = rhs2_stmt;
+      rhs2 = gimple_assign_rhs1 (rhs2_stmt);
+      if (TREE_CODE (rhs2) == SSA_NAME)
+	{
+	  rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
+	  if (is_gimple_assign (rhs2_stmt))
+	    rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
+	}
+      else
+	return false;
+    }
+
+  /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
+     is_widening_mult_p, but we still need the rhs returns.
+
+     It might also appear that it would be sufficient to use the existing
+     operands of the widening multiply, but that would limit the choice of
+     multiply-and-accumulate instructions.  */
+  if (code == PLUS_EXPR
+      && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
     {
       if (!is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
 			       &type2, &mult_rhs2))
 	return false;
       add_rhs = rhs2;
+      conv_stmt = conv1_stmt;
     }
-  else if (rhs2_code == MULT_EXPR)
+  else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
     {
       if (!is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
 			       &type2, &mult_rhs2))
 	return false;
       add_rhs = rhs1;
-    }
-  else if (code == PLUS_EXPR && rhs1_code == WIDEN_MULT_EXPR)
-    {
-      mult_rhs1 = gimple_assign_rhs1 (rhs1_stmt);
-      mult_rhs2 = gimple_assign_rhs2 (rhs1_stmt);
-      type1 = TREE_TYPE (mult_rhs1);
-      type2 = TREE_TYPE (mult_rhs2);
-      add_rhs = rhs2;
-    }
-  else if (rhs2_code == WIDEN_MULT_EXPR)
-    {
-      mult_rhs1 = gimple_assign_rhs1 (rhs2_stmt);
-      mult_rhs2 = gimple_assign_rhs2 (rhs2_stmt);
-      type1 = TREE_TYPE (mult_rhs1);
-      type2 = TREE_TYPE (mult_rhs2);
-      add_rhs = rhs1;
+      conv_stmt = conv2_stmt;
     }
   else
     return false;
 
-  if (TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
-    return false;
+  to_mode = TYPE_MODE (type);
+  from_mode = TYPE_MODE (type1);
+  from_unsigned1 = TYPE_UNSIGNED (type1);
+  from_unsigned2 = TYPE_UNSIGNED (type2);
+
+  /* There's no such thing as a mixed sign madd yet, so use a wider mode.  */
+  if (from_unsigned1 != from_unsigned2)
+    {
+      /* We can use a signed multiply with unsigned types as long as
+	 there is a wider mode to use, or it is the smaller of the two
+	 types that is unsigned.  Note that type1 >= type2, always.  */
+      if ((from_unsigned1
+	   && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
+	  || (from_unsigned2
+	      && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
+	{
+	  from_mode = GET_MODE_WIDER_MODE (from_mode);
+	  if (GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
+	    return false;
+	}
+
+      from_unsigned1 = from_unsigned2 = false;
+    }
+
+  /* If there was a conversion between the multiply and addition
+     then we need to make sure it fits a multiply-and-accumulate.
+     The should be a single mode change which does not change the
+     value.  */
+  if (conv_stmt)
+    {
+      /* We use the original, unmodified data types for this.  */
+      tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
+      tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
+      int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
+      bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
+
+      if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
+	{
+	  /* Conversion is a truncate.  */
+	  if (TYPE_PRECISION (to_type) < data_size)
+	    return false;
+	}
+      else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
+	{
+	  /* Conversion is an extend.  Check it's the right sort.  */
+	  if (TYPE_UNSIGNED (from_type) != is_unsigned
+	      && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
+	    return false;
+	}
+      /* else convert is a no-op for our purposes.  */
+    }
 
   /* Verify that the machine can perform a widening multiply
      accumulate in this mode/signedness combination, otherwise
      this transformation is likely to pessimize code.  */
-  this_optab = optab_for_tree_code (wmult_code, type1, optab_default);
-  if (optab_handler (this_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
+  optype = build_nonstandard_integer_type (from_mode, from_unsigned1);
+  this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
+  handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
+						  from_mode, 0, &actual_mode);
+
+  if (handler == CODE_FOR_nothing)
     return false;
 
-  /* ??? May need some type verification here?  */
+  /* Ensure that the inputs to the handler are in the correct precison
+     for the opcode.  This will be the full mode size.  */
+  actual_precision = GET_MODE_PRECISION (actual_mode);
+  if (actual_precision != TYPE_PRECISION (type1)
+      || from_unsigned1 != TYPE_UNSIGNED (type1))
+    {
+      tmp = create_tmp_var (build_nonstandard_integer_type
+				(actual_precision, from_unsigned1),
+			    NULL);
+      mult_rhs1 = build_and_insert_cast (gsi, loc, tmp, mult_rhs1);
+    }
+  if (actual_precision != TYPE_PRECISION (type2)
+      || from_unsigned2 != TYPE_UNSIGNED (type2))
+    {
+      if (!tmp || from_unsigned1 != from_unsigned2)
+	tmp = create_tmp_var (build_nonstandard_integer_type
+				(actual_precision, from_unsigned2),
+			      NULL);
+      mult_rhs2 = build_and_insert_cast (gsi, loc, tmp, mult_rhs2);
+    }
+
+  if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
+    add_rhs = build_and_insert_cast (gsi, loc, create_tmp_var (type, NULL),
+				     add_rhs);
+
+  /* Handle constants.  */
+  if (TREE_CODE (mult_rhs1) == INTEGER_CST)
+    mult_rhs1 = fold_convert (type1, mult_rhs1);
+  if (TREE_CODE (mult_rhs2) == INTEGER_CST)
+    mult_rhs2 = fold_convert (type2, mult_rhs2);
 
-  gimple_assign_set_rhs_with_ops_1 (gsi, wmult_code,
-				    fold_convert (type1, mult_rhs1),
-				    fold_convert (type2, mult_rhs2),
+  gimple_assign_set_rhs_with_ops_1 (gsi, wmult_code, mult_rhs1, mult_rhs2,
 				    add_rhs);
   update_stmt (gsi_stmt (*gsi));
   return true;
@@ -1696,7 +1913,7 @@
 	      switch (code)
 		{
 		case MULT_EXPR:
-		  if (!convert_mult_to_widen (stmt)
+		  if (!convert_mult_to_widen (stmt, &gsi)
 		      && convert_mult_to_fma (stmt,
 					      gimple_assign_rhs1 (stmt),
 					      gimple_assign_rhs2 (stmt)))
--- a/src/gcc/tree-ssa-phiopt.c
+++ b/src/gcc/tree-ssa-phiopt.c
@@ -34,6 +34,8 @@
 #include "langhooks.h"
 #include "pointer-set.h"
 #include "domwalk.h"
+#include "cfgloop.h"
+#include "tree-data-ref.h"
 
 static unsigned int tree_ssa_phiopt (void);
 static unsigned int tree_ssa_phiopt_worker (bool);
@@ -1303,35 +1305,18 @@
   return true;
 }
 
-/* Do the main work of conditional store replacement.  We already know
-   that the recognized pattern looks like so:
-
-   split:
-     if (cond) goto THEN_BB; else goto ELSE_BB (edge E1)
-   THEN_BB:
-     X = Y;
-     goto JOIN_BB;
-   ELSE_BB:
-     X = Z;
-     fallthrough (edge E0)
-   JOIN_BB:
-     some more
-
-   We check that THEN_BB and ELSE_BB contain only one store
-   that the stores have a "simple" RHS.  */
+/* Do the main work of conditional store replacement.  */
 
 static bool
-cond_if_else_store_replacement (basic_block then_bb, basic_block else_bb,
-				basic_block join_bb)
+cond_if_else_store_replacement_1 (basic_block then_bb, basic_block else_bb,
+				  basic_block join_bb, gimple then_assign,
+				  gimple else_assign)
 {
-  gimple then_assign = last_and_only_stmt (then_bb);
-  gimple else_assign = last_and_only_stmt (else_bb);
   tree lhs_base, lhs, then_rhs, else_rhs;
   source_location then_locus, else_locus;
   gimple_stmt_iterator gsi;
   gimple newphi, new_stmt;
 
-  /* Check if then_bb and else_bb contain only one store each.  */
   if (then_assign == NULL
       || !gimple_assign_single_p (then_assign)
       || gimple_has_volatile_ops (then_assign)
@@ -1398,6 +1383,190 @@
   return true;
 }
 
+/* Conditional store replacement.  We already know
+   that the recognized pattern looks like so:
+
+   split:
+     if (cond) goto THEN_BB; else goto ELSE_BB (edge E1)
+   THEN_BB:
+     ...
+     X = Y;
+     ...
+     goto JOIN_BB;
+   ELSE_BB:
+     ...
+     X = Z;
+     ...
+     fallthrough (edge E0)
+   JOIN_BB:
+     some more
+
+   We check that it is safe to sink the store to JOIN_BB by verifying that
+   there are no read-after-write or write-after-write dependencies in
+   THEN_BB and ELSE_BB.  */
+
+static bool
+cond_if_else_store_replacement (basic_block then_bb, basic_block else_bb,
+                                basic_block join_bb)
+{
+  gimple then_assign = last_and_only_stmt (then_bb);
+  gimple else_assign = last_and_only_stmt (else_bb);
+  VEC (data_reference_p, heap) *then_datarefs, *else_datarefs;
+  VEC (ddr_p, heap) *then_ddrs, *else_ddrs;
+  gimple then_store, else_store;
+  bool found, ok = false, res;
+  struct data_dependence_relation *ddr;
+  data_reference_p then_dr, else_dr;
+  int i, j;
+  tree then_lhs, else_lhs;
+  VEC (gimple, heap) *then_stores, *else_stores;
+  basic_block blocks[3];
+
+  if (MAX_STORES_TO_SINK == 0)
+    return false;
+
+  /* Handle the case with single statement in THEN_BB and ELSE_BB.  */
+  if (then_assign && else_assign)
+    return cond_if_else_store_replacement_1 (then_bb, else_bb, join_bb,
+                                             then_assign, else_assign);
+
+  /* Find data references.  */
+  then_datarefs = VEC_alloc (data_reference_p, heap, 1);
+  else_datarefs = VEC_alloc (data_reference_p, heap, 1);
+  if ((find_data_references_in_bb (NULL, then_bb, &then_datarefs)
+        == chrec_dont_know)
+      || !VEC_length (data_reference_p, then_datarefs)
+      || (find_data_references_in_bb (NULL, else_bb, &else_datarefs)
+        == chrec_dont_know)
+      || !VEC_length (data_reference_p, else_datarefs))
+    {
+      free_data_refs (then_datarefs);
+      free_data_refs (else_datarefs);
+      return false;
+    }
+
+  /* Find pairs of stores with equal LHS.  */
+  then_stores = VEC_alloc (gimple, heap, 1);
+  else_stores = VEC_alloc (gimple, heap, 1);
+  FOR_EACH_VEC_ELT (data_reference_p, then_datarefs, i, then_dr)
+    {
+      if (DR_IS_READ (then_dr))
+        continue;
+
+      then_store = DR_STMT (then_dr);
+      then_lhs = gimple_assign_lhs (then_store);
+      found = false;
+
+      FOR_EACH_VEC_ELT (data_reference_p, else_datarefs, j, else_dr)
+        {
+          if (DR_IS_READ (else_dr))
+            continue;
+
+          else_store = DR_STMT (else_dr);
+          else_lhs = gimple_assign_lhs (else_store);
+
+          if (operand_equal_p (then_lhs, else_lhs, 0))
+            {
+              found = true;
+              break;
+            }
+        }
+
+      if (!found)
+        continue;
+
+      VEC_safe_push (gimple, heap, then_stores, then_store);
+      VEC_safe_push (gimple, heap, else_stores, else_store);
+    }
+
+  /* No pairs of stores found.  */
+  if (!VEC_length (gimple, then_stores)
+      || VEC_length (gimple, then_stores) > (unsigned) MAX_STORES_TO_SINK)
+    {
+      free_data_refs (then_datarefs);
+      free_data_refs (else_datarefs);
+      VEC_free (gimple, heap, then_stores);
+      VEC_free (gimple, heap, else_stores);
+      return false;
+    }
+
+  /* Compute and check data dependencies in both basic blocks.  */
+  then_ddrs = VEC_alloc (ddr_p, heap, 1);
+  else_ddrs = VEC_alloc (ddr_p, heap, 1);
+  compute_all_dependences (then_datarefs, &then_ddrs, NULL, false);
+  compute_all_dependences (else_datarefs, &else_ddrs, NULL, false);
+  blocks[0] = then_bb;
+  blocks[1] = else_bb;
+  blocks[2] = join_bb;
+  renumber_gimple_stmt_uids_in_blocks (blocks, 3);
+
+  /* Check that there are no read-after-write or write-after-write dependencies
+     in THEN_BB.  */
+  FOR_EACH_VEC_ELT (ddr_p, then_ddrs, i, ddr)
+    {
+      struct data_reference *dra = DDR_A (ddr);
+      struct data_reference *drb = DDR_B (ddr);
+
+      if (DDR_ARE_DEPENDENT (ddr) != chrec_known
+          && ((DR_IS_READ (dra) && DR_IS_WRITE (drb)
+               && gimple_uid (DR_STMT (dra)) > gimple_uid (DR_STMT (drb)))
+              || (DR_IS_READ (drb) && DR_IS_WRITE (dra)
+                  && gimple_uid (DR_STMT (drb)) > gimple_uid (DR_STMT (dra)))
+              || (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))))
+        {
+          free_dependence_relations (then_ddrs);
+          free_dependence_relations (else_ddrs);
+          free_data_refs (then_datarefs);
+          free_data_refs (else_datarefs);
+          VEC_free (gimple, heap, then_stores);
+          VEC_free (gimple, heap, else_stores);
+          return false;
+        }
+    }
+
+  /* Check that there are no read-after-write or write-after-write dependencies
+     in ELSE_BB.  */
+  FOR_EACH_VEC_ELT (ddr_p, else_ddrs, i, ddr)
+    {
+      struct data_reference *dra = DDR_A (ddr);
+      struct data_reference *drb = DDR_B (ddr);
+
+      if (DDR_ARE_DEPENDENT (ddr) != chrec_known
+          && ((DR_IS_READ (dra) && DR_IS_WRITE (drb)
+               && gimple_uid (DR_STMT (dra)) > gimple_uid (DR_STMT (drb)))
+              || (DR_IS_READ (drb) && DR_IS_WRITE (dra)
+                  && gimple_uid (DR_STMT (drb)) > gimple_uid (DR_STMT (dra)))
+              || (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))))
+        {
+          free_dependence_relations (then_ddrs);
+          free_dependence_relations (else_ddrs);
+          free_data_refs (then_datarefs);
+          free_data_refs (else_datarefs);
+          VEC_free (gimple, heap, then_stores);
+          VEC_free (gimple, heap, else_stores);
+          return false;
+        }
+    }
+
+  /* Sink stores with same LHS.  */
+  FOR_EACH_VEC_ELT (gimple, then_stores, i, then_store)
+    {
+      else_store = VEC_index (gimple, else_stores, i);
+      res = cond_if_else_store_replacement_1 (then_bb, else_bb, join_bb,
+                                              then_store, else_store);
+      ok = ok || res;
+    }
+
+  free_dependence_relations (then_ddrs);
+  free_dependence_relations (else_ddrs);
+  free_data_refs (then_datarefs);
+  free_data_refs (else_datarefs);
+  VEC_free (gimple, heap, then_stores);
+  VEC_free (gimple, heap, else_stores);
+
+  return ok;
+}
+
 /* Always do these optimizations if we have SSA
    trees to work on.  */
 static bool
--- a/src/gcc/tree-ssa-pre.c
+++ b/src/gcc/tree-ssa-pre.c
@@ -2640,11 +2640,13 @@
 }
 
 /* Return true if we can value number the call in STMT.  This is true
-   if we have a pure or constant call.  */
+   if we have a pure or constant call to a real function.  */
 
 static bool
 can_value_number_call (gimple stmt)
 {
+  if (gimple_call_internal_p (stmt))
+    return false;
   if (gimple_call_flags (stmt) & (ECF_PURE | ECF_CONST))
     return true;
   return false;
@@ -4173,6 +4175,7 @@
   gimple_stmt_iterator gsi;
   gimple stmt;
   unsigned i;
+  tree fn;
 
   FOR_EACH_BB (b)
     {
@@ -4364,9 +4367,10 @@
 	  /* Visit indirect calls and turn them into direct calls if
 	     possible.  */
 	  if (is_gimple_call (stmt)
-	      && TREE_CODE (gimple_call_fn (stmt)) == SSA_NAME)
+	      && (fn = gimple_call_fn (stmt))
+	      && TREE_CODE (fn) == SSA_NAME)
 	    {
-	      tree fn = VN_INFO (gimple_call_fn (stmt))->valnum;
+	      fn = VN_INFO (fn)->valnum;
 	      if (TREE_CODE (fn) == ADDR_EXPR
 		  && TREE_CODE (TREE_OPERAND (fn, 0)) == FUNCTION_DECL)
 		{
--- a/src/gcc/tree-ssa-sccvn.c
+++ b/src/gcc/tree-ssa-sccvn.c
@@ -2986,7 +2986,8 @@
 	  /* ???  We should handle stores from calls.  */
 	  else if (TREE_CODE (lhs) == SSA_NAME)
 	    {
-	      if (gimple_call_flags (stmt) & (ECF_PURE | ECF_CONST))
+	      if (!gimple_call_internal_p (stmt)
+		  && gimple_call_flags (stmt) & (ECF_PURE | ECF_CONST))
 		changed = visit_reference_op_call (lhs, stmt);
 	      else
 		changed = defs_to_varying (stmt);
--- a/src/gcc/tree-ssa-structalias.c
+++ b/src/gcc/tree-ssa-structalias.c
@@ -4330,6 +4330,7 @@
 	    /* Fallthru to general call handling.  */;
 	  }
       if (!in_ipa_mode
+	  || gimple_call_internal_p (t)
 	  || (fndecl
 	      && (!(fi = lookup_vi_for_tree (fndecl))
 		  || !fi->is_fn_info)))
--- a/src/gcc/tree-vect-data-refs.c
+++ b/src/gcc/tree-vect-data-refs.c
@@ -43,6 +43,45 @@
 #include "expr.h"
 #include "optabs.h"
 
+/* Return true if load- or store-lanes optab OPTAB is implemented for
+   COUNT vectors of type VECTYPE.  NAME is the name of OPTAB.  */
+
+static bool
+vect_lanes_optab_supported_p (const char *name, convert_optab optab,
+			      tree vectype, unsigned HOST_WIDE_INT count)
+{
+  enum machine_mode mode, array_mode;
+  bool limit_p;
+
+  mode = TYPE_MODE (vectype);
+  limit_p = !targetm.array_mode_supported_p (mode, count);
+  array_mode = mode_for_size (count * GET_MODE_BITSIZE (mode),
+			      MODE_INT, limit_p);
+
+  if (array_mode == BLKmode)
+    {
+      if (vect_print_dump_info (REPORT_DETAILS))
+	fprintf (vect_dump, "no array mode for %s[" HOST_WIDE_INT_PRINT_DEC "]",
+		 GET_MODE_NAME (mode), count);
+      return false;
+    }
+
+  if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
+    {
+      if (vect_print_dump_info (REPORT_DETAILS))
+	fprintf (vect_dump, "cannot use %s<%s><%s>",
+		 name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
+      return false;
+    }
+
+  if (vect_print_dump_info (REPORT_DETAILS))
+    fprintf (vect_dump, "can use %s<%s><%s>",
+	     name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
+
+  return true;
+}
+
+
 /* Return the smallest scalar part of STMT.
    This is used to determine the vectype of the stmt.  We generally set the
    vectype according to the type of the result (lhs).  For stmts whose
@@ -72,6 +111,7 @@
   if (is_gimple_assign (stmt)
       && (gimple_assign_cast_p (stmt)
           || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
+          || gimple_assign_rhs_code (stmt) == WIDEN_LSHIFT_EXPR
           || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
     {
       tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
@@ -289,39 +329,6 @@
     }
 }
 
-
-/* Function vect_equal_offsets.
-
-   Check if OFFSET1 and OFFSET2 are identical expressions.  */
-
-static bool
-vect_equal_offsets (tree offset1, tree offset2)
-{
-  bool res;
-
-  STRIP_NOPS (offset1);
-  STRIP_NOPS (offset2);
-
-  if (offset1 == offset2)
-    return true;
-
-  if (TREE_CODE (offset1) != TREE_CODE (offset2)
-      || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
-    return false;
-
-  res = vect_equal_offsets (TREE_OPERAND (offset1, 0),
-			    TREE_OPERAND (offset2, 0));
-
-  if (!res || !BINARY_CLASS_P (offset1))
-    return res;
-
-  res = vect_equal_offsets (TREE_OPERAND (offset1, 1),
-			    TREE_OPERAND (offset2, 1));
-
-  return res;
-}
-
-
 /* Check dependence between DRA and DRB for basic block vectorization.
    If the accesses share same bases and offsets, we can compare their initial
    constant offsets to decide whether they differ or not.  In case of a read-
@@ -347,12 +354,8 @@
 
   /* Check that the data-refs have same bases and offsets.  If not, we can't
      determine if they are dependent.  */
-  if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
-       && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
-           || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
-           || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
-           != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
-      || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb)))
+  if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
+      || !dr_equal_offsets_p (dra, drb))
     return true;
 
   /* Check the types.  */
@@ -397,12 +400,8 @@
 
   /* Check that the data-refs have same first location (except init) and they
      are both either store or load (not load and store).  */
-  if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
-       && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
-	   || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
-	   || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
-	   != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
-      || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
+  if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
+      || !dr_equal_offsets_p (dra, drb)
       || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
       || DR_IS_READ (dra) != DR_IS_READ (drb))
     return false;
@@ -609,6 +608,11 @@
       if (vect_check_interleaving (dra, drb))
          return false;
 
+      /* Read-read is OK (we need this check here, after checking for
+         interleaving).  */
+      if (DR_IS_READ (dra) && DR_IS_READ (drb))
+        return false;
+
       if (vect_print_dump_info (REPORT_DR_DETAILS))
         {
           fprintf (vect_dump, "can't determine dependence between ");
@@ -841,6 +845,24 @@
 	}
     }
 
+  /* Similarly, if we're doing basic-block vectorization, we can only use
+     base and misalignment information relative to an innermost loop if the
+     misalignment stays the same throughout the execution of the loop.
+     As above, this is the case if the stride of the dataref evenly divides
+     by the vector size.  */
+  if (!loop)
+    {
+      tree step = DR_STEP (dr);
+      HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
+
+      if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0)
+	{
+	  if (vect_print_dump_info (REPORT_ALIGNMENT))
+	    fprintf (vect_dump, "SLP: step doesn't divide the vector-size.");
+	  misalign = NULL_TREE;
+	}
+    }
+
   base = build_fold_indirect_ref (base_addr);
   alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
 
@@ -1053,6 +1075,9 @@
       gimple stmt = DR_STMT (dr);
       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
 
+      if (!STMT_VINFO_RELEVANT_P (stmt_info))
+	continue;
+
       /* For interleaving, only the alignment of the first access matters. 
          Skip statements marked as not vectorizable.  */
       if ((STMT_VINFO_STRIDED_ACCESS (stmt_info)
@@ -1171,17 +1196,11 @@
   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
   int ncopies = vf / nunits;
-  bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
 
-  if (!supportable_dr_alignment)
-    *inside_cost = VECT_MAX_COST;
+  if (DR_IS_READ (dr))
+    vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost);
   else
-    {
-      if (DR_IS_READ (dr))
-        vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost);
-      else
-        vect_get_store_cost (dr, ncopies, inside_cost);
-    }
+    vect_get_store_cost (dr, ncopies, inside_cost);
 
   if (vect_print_dump_info (REPORT_COST))
     fprintf (vect_dump, "vect_get_data_access_cost: inside_cost = %d, "
@@ -1250,7 +1269,9 @@
   vect_peel_info elem = (vect_peel_info) *slot;
   vect_peel_extended_info max = (vect_peel_extended_info) data;
 
-  if (elem->count > max->peel_info.count)
+  if (elem->count > max->peel_info.count
+      || (elem->count == max->peel_info.count
+          && max->peel_info.npeel > elem->npeel))
     {
       max->peel_info.npeel = elem->npeel;
       max->peel_info.count = elem->count;
@@ -1493,7 +1514,7 @@
       stmt = DR_STMT (dr);
       stmt_info = vinfo_for_stmt (stmt);
 
-      if (!STMT_VINFO_RELEVANT (stmt_info))
+      if (!STMT_VINFO_RELEVANT_P (stmt_info))
 	continue;
 
       /* For interleaving, only the alignment of the first access
@@ -2256,19 +2277,6 @@
           return false;
         }
 
-      /* FORNOW: we handle only interleaving that is a power of 2.
-         We don't fail here if it may be still possible to vectorize the
-         group using SLP.  If not, the size of the group will be checked in
-         vect_analyze_operations, and the vectorization will fail.  */
-      if (exact_log2 (stride) == -1)
-	{
-	  if (vect_print_dump_info (REPORT_DETAILS))
-	    fprintf (vect_dump, "interleaving is not a power of 2");
-
-	  if (slp_impossible)
-	    return false;
-	}
-
       if (stride == 0)
         stride = count;
 
@@ -2993,31 +3001,33 @@
 
 /* Function vect_create_data_ref_ptr.
 
-   Create a new pointer to vector type (vp), that points to the first location
-   accessed in the loop by STMT, along with the def-use update chain to
-   appropriately advance the pointer through the loop iterations. Also set
-   aliasing information for the pointer.  This vector pointer is used by the
-   callers to this function to create a memory reference expression for vector
-   load/store access.
+   Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
+   location accessed in the loop by STMT, along with the def-use update
+   chain to appropriately advance the pointer through the loop iterations.
+   Also set aliasing information for the pointer.  This pointer is used by
+   the callers to this function to create a memory reference expression for
+   vector load/store access.
 
    Input:
    1. STMT: a stmt that references memory. Expected to be of the form
          GIMPLE_ASSIGN <name, data-ref> or
 	 GIMPLE_ASSIGN <data-ref, name>.
-   2. AT_LOOP: the loop where the vector memref is to be created.
-   3. OFFSET (optional): an offset to be added to the initial address accessed
+   2. AGGR_TYPE: the type of the reference, which should be either a vector
+        or an array.
+   3. AT_LOOP: the loop where the vector memref is to be created.
+   4. OFFSET (optional): an offset to be added to the initial address accessed
         by the data-ref in STMT.
-   4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
+   5. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
         pointing to the initial address.
-   5. TYPE: if not NULL indicates the required type of the data-ref.
+   6. TYPE: if not NULL indicates the required type of the data-ref.
 
    Output:
    1. Declare a new ptr to vector_type, and have it point to the base of the
       data reference (initial addressed accessed by the data reference).
       For example, for vector of type V8HI, the following code is generated:
 
-      v8hi *vp;
-      vp = (v8hi *)initial_address;
+      v8hi *ap;
+      ap = (v8hi *)initial_address;
 
       if OFFSET is not supplied:
          initial_address = &a[init];
@@ -3037,7 +3047,7 @@
    4. Return the pointer.  */
 
 tree
-vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
+vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop,
 			  tree offset, tree *initial_address, gimple *ptr_incr,
 			  bool only_init, bool *inv_p)
 {
@@ -3047,17 +3057,16 @@
   struct loop *loop = NULL;
   bool nested_in_vect_loop = false;
   struct loop *containing_loop = NULL;
-  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
-  tree vect_ptr_type;
-  tree vect_ptr;
+  tree aggr_ptr_type;
+  tree aggr_ptr;
   tree new_temp;
   gimple vec_stmt;
   gimple_seq new_stmt_list = NULL;
   edge pe = NULL;
   basic_block new_bb;
-  tree vect_ptr_init;
+  tree aggr_ptr_init;
   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
-  tree vptr;
+  tree aptr;
   gimple_stmt_iterator incr_gsi;
   bool insert_after;
   bool negative;
@@ -3068,6 +3077,9 @@
   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
   tree base;
 
+  gcc_assert (TREE_CODE (aggr_type) == ARRAY_TYPE
+	      || TREE_CODE (aggr_type) == VECTOR_TYPE);
+
   if (loop_vinfo)
     {
       loop = LOOP_VINFO_LOOP (loop_vinfo);
@@ -3102,8 +3114,9 @@
   if (vect_print_dump_info (REPORT_DETAILS))
     {
       tree data_ref_base = base_name;
-      fprintf (vect_dump, "create vector-pointer variable to type: ");
-      print_generic_expr (vect_dump, vectype, TDF_SLIM);
+      fprintf (vect_dump, "create %s-pointer variable to type: ",
+	       tree_code_name[(int) TREE_CODE (aggr_type)]);
+      print_generic_expr (vect_dump, aggr_type, TDF_SLIM);
       if (TREE_CODE (data_ref_base) == VAR_DECL
           || TREE_CODE (data_ref_base) == ARRAY_REF)
         fprintf (vect_dump, "  vectorizing an array ref: ");
@@ -3114,27 +3127,28 @@
       print_generic_expr (vect_dump, base_name, TDF_SLIM);
     }
 
-  /* (1) Create the new vector-pointer variable.  */
-  vect_ptr_type = build_pointer_type (vectype);
+  /* (1) Create the new aggregate-pointer variable.  */
+  aggr_ptr_type = build_pointer_type (aggr_type);
   base = get_base_address (DR_REF (dr));
   if (base
       && TREE_CODE (base) == MEM_REF)
-    vect_ptr_type
-      = build_qualified_type (vect_ptr_type,
+    aggr_ptr_type
+      = build_qualified_type (aggr_ptr_type,
 			      TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
-  vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
+  aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
                                     get_name (base_name));
 
-  /* Vector types inherit the alias set of their component type by default so
-     we need to use a ref-all pointer if the data reference does not conflict
-     with the created vector data reference because it is not addressable.  */
-  if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
+  /* Vector and array types inherit the alias set of their component
+     type by default so we need to use a ref-all pointer if the data
+     reference does not conflict with the created aggregated data
+     reference because it is not addressable.  */
+  if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
 			      get_alias_set (DR_REF (dr))))
     {
-      vect_ptr_type
-	= build_pointer_type_for_mode (vectype,
-				       TYPE_MODE (vect_ptr_type), true);
-      vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
+      aggr_ptr_type
+	= build_pointer_type_for_mode (aggr_type,
+				       TYPE_MODE (aggr_ptr_type), true);
+      aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
 					get_name (base_name));
     }
 
@@ -3145,14 +3159,14 @@
       do
 	{
 	  tree lhs = gimple_assign_lhs (orig_stmt);
-	  if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
+	  if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
 				      get_alias_set (lhs)))
 	    {
-	      vect_ptr_type
-		= build_pointer_type_for_mode (vectype,
-					       TYPE_MODE (vect_ptr_type), true);
-	      vect_ptr
-		= vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
+	      aggr_ptr_type
+		= build_pointer_type_for_mode (aggr_type,
+					       TYPE_MODE (aggr_ptr_type), true);
+	      aggr_ptr
+		= vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
 					 get_name (base_name));
 	      break;
 	    }
@@ -3162,7 +3176,7 @@
       while (orig_stmt);
     }
 
-  add_referenced_var (vect_ptr);
+  add_referenced_var (aggr_ptr);
 
   /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
      vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
@@ -3195,8 +3209,8 @@
 		vp2 = vp1 + step
 		if () goto LOOP   */
 
-  /* (2) Calculate the initial address the vector-pointer, and set
-         the vector-pointer to point to it before the loop.  */
+  /* (2) Calculate the initial address of the aggregate-pointer, and set
+     the aggregate-pointer to point to it before the loop.  */
 
   /* Create: (&(base[init_val+offset]) in the loop preheader.  */
 
@@ -3215,17 +3229,17 @@
 
   *initial_address = new_temp;
 
-  /* Create: p = (vectype *) initial_base  */
+  /* Create: p = (aggr_type *) initial_base  */
   if (TREE_CODE (new_temp) != SSA_NAME
-      || !useless_type_conversion_p (vect_ptr_type, TREE_TYPE (new_temp)))
+      || !useless_type_conversion_p (aggr_ptr_type, TREE_TYPE (new_temp)))
     {
-      vec_stmt = gimple_build_assign (vect_ptr,
-				      fold_convert (vect_ptr_type, new_temp));
-      vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
+      vec_stmt = gimple_build_assign (aggr_ptr,
+				      fold_convert (aggr_ptr_type, new_temp));
+      aggr_ptr_init = make_ssa_name (aggr_ptr, vec_stmt);
       /* Copy the points-to information if it exists. */
       if (DR_PTR_INFO (dr))
-	duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
-      gimple_assign_set_lhs (vec_stmt, vect_ptr_init);
+	duplicate_ssa_name_ptr_info (aggr_ptr_init, DR_PTR_INFO (dr));
+      gimple_assign_set_lhs (vec_stmt, aggr_ptr_init);
       if (pe)
 	{
 	  new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
@@ -3235,19 +3249,19 @@
 	gsi_insert_before (&gsi, vec_stmt, GSI_SAME_STMT);
     }
   else
-    vect_ptr_init = new_temp;
+    aggr_ptr_init = new_temp;
 
-  /* (3) Handle the updating of the vector-pointer inside the loop.
+  /* (3) Handle the updating of the aggregate-pointer inside the loop.
      This is needed when ONLY_INIT is false, and also when AT_LOOP is the
      inner-loop nested in LOOP (during outer-loop vectorization).  */
 
   /* No update in loop is required.  */
   if (only_init && (!loop_vinfo || at_loop == loop))
-    vptr = vect_ptr_init;
+    aptr = aggr_ptr_init;
   else
     {
-      /* The step of the vector pointer is the Vector Size.  */
-      tree step = TYPE_SIZE_UNIT (vectype);
+      /* The step of the aggregate pointer is the type size.  */
+      tree step = TYPE_SIZE_UNIT (aggr_type);
       /* One exception to the above is when the scalar step of the load in
 	 LOOP is zero. In this case the step here is also zero.  */
       if (*inv_p)
@@ -3257,9 +3271,9 @@
 
       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
 
-      create_iv (vect_ptr_init,
-		 fold_convert (vect_ptr_type, step),
-		 vect_ptr, loop, &incr_gsi, insert_after,
+      create_iv (aggr_ptr_init,
+		 fold_convert (aggr_ptr_type, step),
+		 aggr_ptr, loop, &incr_gsi, insert_after,
 		 &indx_before_incr, &indx_after_incr);
       incr = gsi_stmt (incr_gsi);
       set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
@@ -3273,14 +3287,14 @@
       if (ptr_incr)
 	*ptr_incr = incr;
 
-      vptr = indx_before_incr;
+      aptr = indx_before_incr;
     }
 
   if (!nested_in_vect_loop || only_init)
-    return vptr;
+    return aptr;
 
 
-  /* (4) Handle the updating of the vector-pointer inside the inner-loop
+  /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
      nested in LOOP, if exists.  */
 
   gcc_assert (nested_in_vect_loop);
@@ -3288,7 +3302,7 @@
     {
       standard_iv_increment_position (containing_loop, &incr_gsi,
 				      &insert_after);
-      create_iv (vptr, fold_convert (vect_ptr_type, DR_STEP (dr)), vect_ptr,
+      create_iv (aptr, fold_convert (aggr_ptr_type, DR_STEP (dr)), aggr_ptr,
 		 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
 		 &indx_after_incr);
       incr = gsi_stmt (incr_gsi);
@@ -3425,13 +3439,22 @@
    and FALSE otherwise.  */
 
 bool
-vect_strided_store_supported (tree vectype)
+vect_strided_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
 {
   optab interleave_high_optab, interleave_low_optab;
   enum machine_mode mode;
 
   mode = TYPE_MODE (vectype);
 
+  /* vect_permute_store_chain requires the group size to be a power of two.  */
+  if (exact_log2 (count) == -1)
+    {
+      if (vect_print_dump_info (REPORT_DETAILS))
+	fprintf (vect_dump, "the size of the group of strided accesses"
+		 " is not a power of 2");
+      return false;
+    }
+
   /* Check that the operation is supported.  */
   interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
 					       vectype, optab_default);
@@ -3456,6 +3479,18 @@
 }
 
 
+/* Return TRUE if vec_store_lanes is available for COUNT vectors of
+   type VECTYPE.  */
+
+bool
+vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
+{
+  return vect_lanes_optab_supported_p ("vec_store_lanes",
+				       vec_store_lanes_optab,
+				       vectype, count);
+}
+
+
 /* Function vect_permute_store_chain.
 
    Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
@@ -3517,7 +3552,7 @@
    I3:  4 12 20 28  5 13 21 30
    I4:  6 14 22 30  7 15 23 31.  */
 
-bool
+void
 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
 			  unsigned int length,
 			  gimple stmt,
@@ -3531,9 +3566,7 @@
   unsigned int j;
   enum tree_code high_code, low_code;
 
-  /* Check that the operation is supported.  */
-  if (!vect_strided_store_supported (vectype))
-    return false;
+  gcc_assert (vect_strided_store_supported (vectype, length));
 
   *result_chain = VEC_copy (tree, heap, dr_chain);
 
@@ -3586,7 +3619,6 @@
 	}
       dr_chain = VEC_copy (tree, heap, *result_chain);
     }
-  return true;
 }
 
 /* Function vect_setup_realignment
@@ -3756,8 +3788,9 @@
 
       gcc_assert (!compute_in_loop);
       vec_dest = vect_create_destination_var (scalar_dest, vectype);
-      ptr = vect_create_data_ref_ptr (stmt, loop_for_initial_load, NULL_TREE,
-				      &init_addr, &inc, true, &inv_p);
+      ptr = vect_create_data_ref_ptr (stmt, vectype, loop_for_initial_load,
+				      NULL_TREE, &init_addr, &inc,
+				      true, &inv_p);
       new_stmt = gimple_build_assign_with_ops
 		   (BIT_AND_EXPR, NULL_TREE, ptr,
 		    build_int_cst (TREE_TYPE (ptr),
@@ -3862,13 +3895,22 @@
    and FALSE otherwise.  */
 
 bool
-vect_strided_load_supported (tree vectype)
+vect_strided_load_supported (tree vectype, unsigned HOST_WIDE_INT count)
 {
   optab perm_even_optab, perm_odd_optab;
   enum machine_mode mode;
 
   mode = TYPE_MODE (vectype);
 
+  /* vect_permute_load_chain requires the group size to be a power of two.  */
+  if (exact_log2 (count) == -1)
+    {
+      if (vect_print_dump_info (REPORT_DETAILS))
+	fprintf (vect_dump, "the size of the group of strided accesses"
+		 " is not a power of 2");
+      return false;
+    }
+
   perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype,
 					 optab_default);
   if (!perm_even_optab)
@@ -3903,6 +3945,16 @@
   return true;
 }
 
+/* Return TRUE if vec_load_lanes is available for COUNT vectors of
+   type VECTYPE.  */
+
+bool
+vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
+{
+  return vect_lanes_optab_supported_p ("vec_load_lanes",
+				       vec_load_lanes_optab,
+				       vectype, count);
+}
 
 /* Function vect_permute_load_chain.
 
@@ -3980,7 +4032,7 @@
    3rd vec (E2):  2 6 10 14 18 22 26 30
    4th vec (E4):  3 7 11 15 19 23 27 31.  */
 
-bool
+static void
 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
 			 unsigned int length,
 			 gimple stmt,
@@ -3993,9 +4045,7 @@
   int i;
   unsigned int j;
 
-  /* Check that the operation is supported.  */
-  if (!vect_strided_load_supported (vectype))
-    return false;
+  gcc_assert (vect_strided_load_supported (vectype, length));
 
   *result_chain = VEC_copy (tree, heap, dr_chain);
   for (i = 0; i < exact_log2 (length); i++)
@@ -4038,7 +4088,6 @@
 	}
       dr_chain = VEC_copy (tree, heap, *result_chain);
     }
-  return true;
 }
 
 
@@ -4049,24 +4098,32 @@
    the scalar statements.
 */
 
-bool
+void
 vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
 			     gimple_stmt_iterator *gsi)
 {
-  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
-  gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
-  gimple next_stmt, new_stmt;
   VEC(tree,heap) *result_chain = NULL;
-  unsigned int i, gap_count;
-  tree tmp_data_ref;
 
   /* DR_CHAIN contains input data-refs that are a part of the interleaving.
      RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
      vectors, that are ready for vector computation.  */
   result_chain = VEC_alloc (tree, heap, size);
-  /* Permute.  */
-  if (!vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain))
-    return false;
+  vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain);
+  vect_record_strided_load_vectors (stmt, result_chain);
+  VEC_free (tree, heap, result_chain);
+}
+
+/* RESULT_CHAIN contains the output of a group of strided loads that were
+   generated as part of the vectorization of STMT.  Assign the statement
+   for each vector to the associated scalar statement.  */
+
+void
+vect_record_strided_load_vectors (gimple stmt, VEC(tree,heap) *result_chain)
+{
+  gimple first_stmt = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
+  gimple next_stmt, new_stmt;
+  unsigned int i, gap_count;
+  tree tmp_data_ref;
 
   /* Put a permuted data-ref in the VECTORIZED_STMT field.
      Since we scan the chain starting from it's first node, their order
@@ -4128,9 +4185,6 @@
 	    break;
         }
     }
-
-  VEC_free (tree, heap, result_chain);
-  return true;
 }
 
 /* Function vect_force_dr_alignment_p.
--- a/src/gcc/tree-vect-generic.c
+++ b/src/gcc/tree-vect-generic.c
@@ -552,7 +552,9 @@
       || code == VEC_UNPACK_LO_EXPR
       || code == VEC_PACK_TRUNC_EXPR
       || code == VEC_PACK_SAT_EXPR
-      || code == VEC_PACK_FIX_TRUNC_EXPR)
+      || code == VEC_PACK_FIX_TRUNC_EXPR
+      || code == VEC_WIDEN_LSHIFT_HI_EXPR
+      || code == VEC_WIDEN_LSHIFT_LO_EXPR)
     type = TREE_TYPE (rhs1);
 
   /* Optabs will try converting a negation into a subtraction, so
--- a/src/gcc/tree-vect-loop.c
+++ b/src/gcc/tree-vect-loop.c
@@ -181,6 +181,8 @@
   stmt_vec_info stmt_info;
   int i;
   HOST_WIDE_INT dummy;
+  gimple stmt, pattern_stmt = NULL, pattern_def_stmt = NULL;
+  bool analyze_pattern_stmt = false, pattern_def = false;
 
   if (vect_print_dump_info (REPORT_DETAILS))
     fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
@@ -241,12 +243,20 @@
 	    }
 	}
 
-      for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
+      for (si = gsi_start_bb (bb); !gsi_end_p (si) || analyze_pattern_stmt;)
         {
-	  tree vf_vectype;
-	  gimple stmt = gsi_stmt (si);
-	  stmt_info = vinfo_for_stmt (stmt);
+          tree vf_vectype;
 
+          if (analyze_pattern_stmt)
+            {
+              stmt = pattern_stmt;
+              analyze_pattern_stmt = false;
+            }
+          else
+            stmt = gsi_stmt (si);
+
+         stmt_info = vinfo_for_stmt (stmt);
+              
 	  if (vect_print_dump_info (REPORT_DETAILS))
 	    {
 	      fprintf (vect_dump, "==> examining statement: ");
@@ -259,11 +269,57 @@
 	  if (!STMT_VINFO_RELEVANT_P (stmt_info)
 	      && !STMT_VINFO_LIVE_P (stmt_info))
 	    {
-	      if (vect_print_dump_info (REPORT_DETAILS))
-	        fprintf (vect_dump, "skip.");
-	      continue;
+              if (STMT_VINFO_IN_PATTERN_P (stmt_info)
+                  && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
+                  && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
+                      || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
+                {
+                  stmt = pattern_stmt;
+                  stmt_info = vinfo_for_stmt (pattern_stmt);
+                  if (vect_print_dump_info (REPORT_DETAILS))
+                    {
+                      fprintf (vect_dump, "==> examining pattern statement: ");
+                      print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
+                    }
+                }
+             else
+               {
+                 if (vect_print_dump_info (REPORT_DETAILS))
+                   fprintf (vect_dump, "skip.");
+                 gsi_next (&si);
+                 continue;
+               }
 	    }
 
+          else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
+                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
+                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
+                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
+            analyze_pattern_stmt = true;
+
+          /* If a pattern statement has a def stmt, analyze it too.  */
+          if (is_pattern_stmt_p (stmt_info)
+              && (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info))
+              && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
+                  || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt))))
+            {
+              if (pattern_def)
+                pattern_def = false;
+              else
+                {
+                  if (vect_print_dump_info (REPORT_DETAILS))
+                    {
+                      fprintf (vect_dump, "==> examining pattern def stmt: ");
+                      print_gimple_stmt (vect_dump, pattern_def_stmt, 0,
+                                         TDF_SLIM);
+                    }
+
+                  pattern_def = true;
+                  stmt = pattern_def_stmt;
+                  stmt_info = vinfo_for_stmt (stmt);
+                }
+            }
+
 	  if (gimple_get_lhs (stmt) == NULL_TREE)
 	    {
 	      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
@@ -295,9 +351,7 @@
 	    }
 	  else
 	    {
-	      gcc_assert (!STMT_VINFO_DATA_REF (stmt_info)
-			  && !is_pattern_stmt_p (stmt_info));
-
+	      gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
 	      scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
 	      if (vect_print_dump_info (REPORT_DETAILS))
 		{
@@ -369,6 +423,9 @@
 	  if (!vectorization_factor
 	      || (nunits > vectorization_factor))
 	    vectorization_factor = nunits;
+
+          if (!analyze_pattern_stmt && !pattern_def)
+            gsi_next (&si);
         }
     }
 
@@ -817,25 +874,17 @@
 
           if (stmt_info)
             {
-              /* Check if this is a "pattern stmt" (introduced by the
-                 vectorizer during the pattern recognition pass).  */
-              bool remove_stmt_p = false;
-              gimple orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
-              if (orig_stmt)
-                {
-                  stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
-                  if (orig_stmt_info
-                      && STMT_VINFO_IN_PATTERN_P (orig_stmt_info))
-                    remove_stmt_p = true;
-                }
+              /* Check if this statement has a related "pattern stmt"
+                 (introduced by the vectorizer during the pattern recognition
+                 pass).  Free pattern's stmt_vec_info.  */
+              if (STMT_VINFO_IN_PATTERN_P (stmt_info)
+                  && vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)))
+                free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info));
 
               /* Free stmt_vec_info.  */
               free_stmt_vec_info (stmt);
-
-              /* Remove dead "pattern stmts".  */
-              if (remove_stmt_p)
-                gsi_remove (&si, true);
             }
+
           gsi_next (&si);
         }
     }
@@ -1409,7 +1458,7 @@
 
   vect_analyze_scalar_cycles (loop_vinfo);
 
-  vect_pattern_recog (loop_vinfo);
+  vect_pattern_recog (loop_vinfo, NULL);
 
   /* Data-flow analysis to detect stmts that do not need to be vectorized.  */
 
@@ -3242,8 +3291,8 @@
 
   /* Get the loop-entry arguments.  */
   if (slp_node)
-    vect_get_slp_defs (reduction_op, NULL_TREE, slp_node, &vec_initial_defs,
-                       NULL, reduc_index);
+    vect_get_vec_defs (reduction_op, NULL_TREE, stmt, &vec_initial_defs,
+                       NULL, slp_node, reduc_index);
   else
     {
       vec_initial_defs = VEC_alloc (tree, heap, 1);
@@ -3968,7 +4017,7 @@
   VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vect_defs = NULL;
   VEC (gimple, heap) *phis = NULL;
   int vec_num;
-  tree def0, def1, tem;
+  tree def0, def1, tem, op0, op1 = NULL_TREE;
 
   if (nested_in_vect_loop_p (loop, stmt))
     {
@@ -4047,6 +4096,9 @@
       gcc_unreachable ();
     }
 
+  if (code == COND_EXPR && slp_node)
+    return false;
+
   scalar_dest = gimple_assign_lhs (stmt);
   scalar_type = TREE_TYPE (scalar_dest);
   if (!POINTER_TYPE_P (scalar_type) && !INTEGRAL_TYPE_P (scalar_type)
@@ -4121,7 +4173,7 @@
 
   if (code == COND_EXPR)
     {
-      if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0))
+      if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0, NULL))
         {
           if (vect_print_dump_info (REPORT_DETAILS))
             fprintf (vect_dump, "unsupported condition in reduction");
@@ -4276,6 +4328,25 @@
       return false;
     }
 
+  /* In case of widenning multiplication by a constant, we update the type
+     of the constant to be the type of the other operand.  We check that the
+     constant fits the type in the pattern recognition pass.  */
+  if (code == DOT_PROD_EXPR
+      && !types_compatible_p (TREE_TYPE (ops[0]), TREE_TYPE (ops[1])))
+    {
+      if (TREE_CODE (ops[0]) == INTEGER_CST)
+        ops[0] = fold_convert (TREE_TYPE (ops[1]), ops[0]);
+      else if (TREE_CODE (ops[1]) == INTEGER_CST)
+        ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
+      else
+        {
+          if (vect_print_dump_info (REPORT_DETAILS))
+            fprintf (vect_dump, "invalid types in dot-prod");
+
+          return false;
+        }
+    }
+
   if (!vec_stmt) /* transformation not required.  */
     {
       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
@@ -4374,7 +4445,7 @@
           gcc_assert (!slp_node);
           vectorizable_condition (stmt, gsi, vec_stmt, 
                                   PHI_RESULT (VEC_index (gimple, phis, 0)), 
-                                  reduc_index);
+                                  reduc_index, NULL);
           /* Multiple types are not supported for condition.  */
           break;
         }
@@ -4382,8 +4453,6 @@
       /* Handle uses.  */
       if (j == 0)
         {
-          tree op0, op1 = NULL_TREE;
-
           op0 = ops[!reduc_index];
           if (op_type == ternary_op)
             {
@@ -4394,8 +4463,8 @@
             }
 
           if (slp_node)
-            vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0, &vec_oprnds1,
-                               -1);
+            vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
+                               slp_node, -1);
           else
             {
               loop_vec_def0 = vect_get_vec_def_for_operand (ops[!reduc_index],
@@ -4413,11 +4482,19 @@
         {
           if (!slp_node)
             {
-              enum vect_def_type dt = vect_unknown_def_type; /* Dummy */
-              loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def0);
+              enum vect_def_type dt;
+              gimple dummy_stmt;
+              tree dummy;
+
+              vect_is_simple_use (ops[!reduc_index], loop_vinfo, NULL,
+                                  &dummy_stmt, &dummy, &dt);
+              loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt,
+                                                              loop_vec_def0);
               VEC_replace (tree, vec_oprnds0, 0, loop_vec_def0);
               if (op_type == ternary_op)
                 {
+                  vect_is_simple_use (op1, loop_vinfo, NULL, &dummy_stmt,
+                                      &dummy, &dt);
                   loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt,
                                                                 loop_vec_def1);
                   VEC_replace (tree, vec_oprnds1, 0, loop_vec_def1);
@@ -4722,6 +4799,8 @@
   tree cond_expr = NULL_TREE;
   gimple_seq cond_expr_stmt_list = NULL;
   bool do_peeling_for_loop_bound;
+  gimple stmt, pattern_stmt, pattern_def_stmt;
+  bool transform_pattern_stmt = false, pattern_def = false;
 
   if (vect_print_dump_info (REPORT_DETAILS))
     fprintf (vect_dump, "=== vec_transform_loop ===");
@@ -4809,11 +4888,19 @@
 	    }
 	}
 
-      for (si = gsi_start_bb (bb); !gsi_end_p (si);)
+      pattern_stmt = NULL;
+      for (si = gsi_start_bb (bb); !gsi_end_p (si) || transform_pattern_stmt;)
 	{
-	  gimple stmt = gsi_stmt (si);
 	  bool is_store;
 
+          if (transform_pattern_stmt)
+            {
+              stmt = pattern_stmt;
+              transform_pattern_stmt = false;
+            }
+          else
+            stmt = gsi_stmt (si);
+
 	  if (vect_print_dump_info (REPORT_DETAILS))
 	    {
 	      fprintf (vect_dump, "------>vectorizing statement: ");
@@ -4836,14 +4923,54 @@
 
 	  if (!STMT_VINFO_RELEVANT_P (stmt_info)
 	      && !STMT_VINFO_LIVE_P (stmt_info))
-	    {
-	      gsi_next (&si);
-	      continue;
+            {
+              if (STMT_VINFO_IN_PATTERN_P (stmt_info)
+                  && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
+                  && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
+                      || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
+                {
+                  stmt = pattern_stmt;
+                  stmt_info = vinfo_for_stmt (stmt);
+                }
+              else
+	        {
+   	          gsi_next (&si);
+	          continue;
+                }
 	    }
+          else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
+                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
+                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
+                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
+            transform_pattern_stmt = true;
+
+          /* If pattern statement has a def stmt, vectorize it too.  */
+          if (is_pattern_stmt_p (stmt_info)
+              && (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info))
+              && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
+                  || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt))))
+            {
+              if (pattern_def)
+                pattern_def = false;
+              else
+                {
+                  if (vect_print_dump_info (REPORT_DETAILS))
+                    {
+                      fprintf (vect_dump, "==> vectorizing pattern def"
+                                          " stmt: ");
+                      print_gimple_stmt (vect_dump, pattern_def_stmt, 0,
+                                         TDF_SLIM);
+                    }
+
+                  pattern_def = true;
+                  stmt = pattern_def_stmt;
+                  stmt_info = vinfo_for_stmt (stmt);
+                }
+            }
 
 	  gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
-	  nunits =
-	    (unsigned int) TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
+	  nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (
+                                               STMT_VINFO_VECTYPE (stmt_info));
 	  if (!STMT_SLP_TYPE (stmt_info)
 	      && nunits != (unsigned int) vectorization_factor
               && vect_print_dump_info (REPORT_DETAILS))
@@ -4868,8 +4995,9 @@
 	      /* Hybrid SLP stmts must be vectorized in addition to SLP.  */
 	      if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info))
 		{
-		  gsi_next (&si);
-		  continue;
+                  if (!transform_pattern_stmt && !pattern_def)
+ 		    gsi_next (&si);
+  		  continue;
 		}
 	    }
 
@@ -4888,7 +5016,7 @@
 		     the chain.  */
 		  vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info));
 		  gsi_remove (&si, true);
-		  continue;
+ 		  continue;
 		}
 	      else
 		{
@@ -4898,7 +5026,9 @@
 		  continue;
 		}
 	    }
-	  gsi_next (&si);
+
+          if (!transform_pattern_stmt && !pattern_def)
+ 	    gsi_next (&si);
 	}		        /* stmts in BB */
     }				/* BBs in loop */
 
--- a/src/gcc/tree-vect-loop-manip.c
+++ b/src/gcc/tree-vect-loop-manip.c
@@ -1105,35 +1105,6 @@
   first_niters = PHI_RESULT (newphi);
 }
 
-
-/* Remove dead assignments from loop NEW_LOOP.  */
-
-static void
-remove_dead_stmts_from_loop (struct loop *new_loop)
-{
-  basic_block *bbs = get_loop_body (new_loop);
-  unsigned i;
-  for (i = 0; i < new_loop->num_nodes; ++i)
-    {
-      gimple_stmt_iterator gsi;
-      for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);)
-	{
-	  gimple stmt = gsi_stmt (gsi);
-	  if (is_gimple_assign (stmt)
-	      && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
-	      && has_zero_uses (gimple_assign_lhs (stmt)))
-	    {
-	      gsi_remove (&gsi, true);
-	      release_defs (stmt);
-	    }
-	  else
-	    gsi_next (&gsi);
-	}
-    }
-  free (bbs);
-}
-
-
 /* Function slpeel_tree_peel_loop_to_edge.
 
    Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
@@ -1445,13 +1416,6 @@
   BITMAP_FREE (definitions);
   delete_update_ssa ();
 
-  /* Remove all pattern statements from the loop copy.  They will confuse
-     the expander if DCE is disabled.
-     ???  The pattern recognizer should be split into an analysis and
-     a transformation phase that is then run only on the loop that is
-     going to be transformed.  */
-  remove_dead_stmts_from_loop (new_loop);
-
   adjust_vec_debug_stmts ();
 
   return new_loop;
--- a/src/gcc/tree-vectorizer.h
+++ b/src/gcc/tree-vectorizer.h
@@ -73,15 +73,15 @@
 /************************************************************************
   SLP
  ************************************************************************/
+typedef void *slp_void_p;
+DEF_VEC_P (slp_void_p);
+DEF_VEC_ALLOC_P (slp_void_p, heap);
 
-/* A computation tree of an SLP instance. Each node corresponds to a group of
+/* A computation tree of an SLP instance.  Each node corresponds to a group of
    stmts to be packed in a SIMD stmt.  */
 typedef struct _slp_tree {
-  /* Only binary and unary operations are supported. LEFT child corresponds to
-     the first operand and RIGHT child to the second if the operation is
-     binary.  */
-  struct _slp_tree *left;
-  struct _slp_tree *right;
+  /* Nodes that contain def-stmts of this node statements operands.  */
+  VEC (slp_void_p, heap) *children;
   /* A group of scalar stmts to be vectorized together.  */
   VEC (gimple, heap) *stmts;
   /* Vectorized stmt/s.  */
@@ -146,14 +146,32 @@
 #define SLP_INSTANCE_LOADS(S)                    (S)->loads
 #define SLP_INSTANCE_FIRST_LOAD_STMT(S)          (S)->first_load
 
-#define SLP_TREE_LEFT(S)                         (S)->left
-#define SLP_TREE_RIGHT(S)                        (S)->right
+#define SLP_TREE_CHILDREN(S)                     (S)->children
 #define SLP_TREE_SCALAR_STMTS(S)                 (S)->stmts
 #define SLP_TREE_VEC_STMTS(S)                    (S)->vec_stmts
 #define SLP_TREE_NUMBER_OF_VEC_STMTS(S)          (S)->vec_stmts_size
 #define SLP_TREE_OUTSIDE_OF_LOOP_COST(S)         (S)->cost.outside_of_loop
 #define SLP_TREE_INSIDE_OF_LOOP_COST(S)          (S)->cost.inside_of_loop
 
+/* This structure is used in creation of an SLP tree.  Each instance
+   corresponds to the same operand in a group of scalar stmts in an SLP
+   node.  */
+typedef struct _slp_oprnd_info
+{
+  /* Def-stmts for the operands.  */
+  VEC (gimple, heap) *def_stmts;
+  /* Information about the first statement, its vector def-type, type, the
+     operand itself in case it's constant, and an indication if it's a pattern
+     stmt.  */
+  enum vect_def_type first_dt;
+  tree first_def_type;
+  tree first_const_oprnd;
+  bool first_pattern;
+} *slp_oprnd_info;
+
+DEF_VEC_P(slp_oprnd_info);
+DEF_VEC_ALLOC_P(slp_oprnd_info, heap);
+
 
 typedef struct _vect_peel_info
 {
@@ -464,6 +482,9 @@
         pattern).  */
   gimple related_stmt;
 
+  /* Used to keep a def stmt of a pattern stmt if such exists.  */
+  gimple pattern_def_stmt;
+
   /* List of datarefs that are known to have the same alignment as the dataref
      of this stmt.  */
   VEC(dr_p,heap) *same_align_refs;
@@ -531,6 +552,7 @@
 
 #define STMT_VINFO_IN_PATTERN_P(S)         (S)->in_pattern_p
 #define STMT_VINFO_RELATED_STMT(S)         (S)->related_stmt
+#define STMT_VINFO_PATTERN_DEF_STMT(S)     (S)->pattern_def_stmt
 #define STMT_VINFO_SAME_ALIGN_REFS(S)      (S)->same_align_refs
 #define STMT_VINFO_DEF_TYPE(S)             (S)->def_type
 #define STMT_VINFO_DR_GROUP_FIRST_DR(S)    (S)->first_dr
@@ -794,9 +816,9 @@
 extern tree vectorizable_function (gimple, tree, tree);
 extern void vect_model_simple_cost (stmt_vec_info, int, enum vect_def_type *,
                                     slp_tree);
-extern void vect_model_store_cost (stmt_vec_info, int, enum vect_def_type,
-                                   slp_tree);
-extern void vect_model_load_cost (stmt_vec_info, int, slp_tree);
+extern void vect_model_store_cost (stmt_vec_info, int, bool,
+				   enum vect_def_type, slp_tree);
+extern void vect_model_load_cost (stmt_vec_info, int, bool, slp_tree);
 extern void vect_finish_stmt_generation (gimple, gimple,
                                          gimple_stmt_iterator *);
 extern bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
@@ -810,10 +832,13 @@
 extern void vect_remove_stores (gimple);
 extern bool vect_analyze_stmt (gimple, bool *, slp_tree);
 extern bool vectorizable_condition (gimple, gimple_stmt_iterator *, gimple *,
-                                    tree, int);
+                                    tree, int, slp_tree);
 extern void vect_get_load_cost (struct data_reference *, int, bool,
                                 unsigned int *, unsigned int *);
 extern void vect_get_store_cost (struct data_reference *, int, unsigned int *);
+extern bool vect_supportable_shift (enum tree_code, tree);
+extern void vect_get_vec_defs (tree, tree, gimple, VEC (tree, heap) **,
+			       VEC (tree, heap) **, slp_tree, int);
 
 /* In tree-vect-data-refs.c.  */
 extern bool vect_can_force_dr_alignment_p (const_tree, unsigned int);
@@ -829,21 +854,22 @@
 extern bool vect_analyze_data_ref_accesses (loop_vec_info, bb_vec_info);
 extern bool vect_prune_runtime_alias_test_list (loop_vec_info);
 extern bool vect_analyze_data_refs (loop_vec_info, bb_vec_info, int *);
-extern tree vect_create_data_ref_ptr (gimple, struct loop *, tree, tree *,
-                                      gimple *, bool, bool *);
+extern tree vect_create_data_ref_ptr (gimple, tree, struct loop *, tree,
+				      tree *, gimple *, bool, bool *);
 extern tree bump_vector_ptr (tree, gimple, gimple_stmt_iterator *, gimple, tree);
 extern tree vect_create_destination_var (tree, tree);
-extern bool vect_strided_store_supported (tree);
-extern bool vect_strided_load_supported (tree);
-extern bool vect_permute_store_chain (VEC(tree,heap) *,unsigned int, gimple,
+extern bool vect_strided_store_supported (tree, unsigned HOST_WIDE_INT);
+extern bool vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT);
+extern bool vect_strided_load_supported (tree, unsigned HOST_WIDE_INT);
+extern bool vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT);
+extern void vect_permute_store_chain (VEC(tree,heap) *,unsigned int, gimple,
                                     gimple_stmt_iterator *, VEC(tree,heap) **);
 extern tree vect_setup_realignment (gimple, gimple_stmt_iterator *, tree *,
                                     enum dr_alignment_support, tree,
                                     struct loop **);
-extern bool vect_permute_load_chain (VEC(tree,heap) *,unsigned int, gimple,
-                                    gimple_stmt_iterator *, VEC(tree,heap) **);
-extern bool vect_transform_strided_load (gimple, VEC(tree,heap) *, int,
+extern void vect_transform_strided_load (gimple, VEC(tree,heap) *, int,
                                          gimple_stmt_iterator *);
+extern void vect_record_strided_load_vectors (gimple, VEC(tree,heap) *);
 extern int vect_get_place_in_interleaving_chain (gimple, gimple);
 extern tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
 extern tree vect_create_addr_base_for_vector_ref (gimple, gimple_seq *,
@@ -879,8 +905,9 @@
 extern bool vect_analyze_slp (loop_vec_info, bb_vec_info);
 extern void vect_make_slp_decision (loop_vec_info);
 extern void vect_detect_hybrid_slp (loop_vec_info);
-extern void vect_get_slp_defs (tree, tree, slp_tree, VEC (tree,heap) **,
-                               VEC (tree,heap) **, int);
+extern void vect_get_slp_defs (VEC (tree, heap) *, slp_tree,
+			       VEC (slp_void_p, heap) **, int);
+
 extern LOC find_bb_location (basic_block);
 extern bb_vec_info vect_slp_analyze_bb (basic_block);
 extern void vect_slp_transform_bb (basic_block);
@@ -889,9 +916,9 @@
 /* Pattern recognition functions.
    Additional pattern recognition functions can (and will) be added
    in the future.  */
-typedef gimple (* vect_recog_func_ptr) (gimple, tree *, tree *);
-#define NUM_PATTERNS 4
-void vect_pattern_recog (loop_vec_info);
+typedef gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *);
+#define NUM_PATTERNS 7 
+void vect_pattern_recog (loop_vec_info, bb_vec_info);
 
 /* In tree-vectorizer.c.  */
 unsigned vectorize_loops (void);
--- a/src/gcc/tree-vect-patterns.c
+++ b/src/gcc/tree-vect-patterns.c
@@ -38,33 +38,94 @@
 #include "recog.h"
 #include "diagnostic-core.h"
 
-/* Function prototypes */
-static void vect_pattern_recog_1
-  (gimple (* ) (gimple, tree *, tree *), gimple_stmt_iterator);
-static bool widened_name_p (tree, gimple, tree *, gimple *);
-
 /* Pattern recognition functions  */
-static gimple vect_recog_widen_sum_pattern (gimple, tree *, tree *);
-static gimple vect_recog_widen_mult_pattern (gimple, tree *, tree *);
-static gimple vect_recog_dot_prod_pattern (gimple, tree *, tree *);
-static gimple vect_recog_pow_pattern (gimple, tree *, tree *);
+static gimple vect_recog_widen_sum_pattern (VEC (gimple, heap) **, tree *,
+					    tree *);
+static gimple vect_recog_widen_mult_pattern (VEC (gimple, heap) **, tree *,
+					     tree *);
+static gimple vect_recog_dot_prod_pattern (VEC (gimple, heap) **, tree *,
+					   tree *);
+static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *);
+static gimple vect_recog_over_widening_pattern (VEC (gimple, heap) **, tree *,
+                                                 tree *);
+static gimple vect_recog_widen_shift_pattern (VEC (gimple, heap) **,
+                                       tree *, tree *);
+static gimple vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **,
+                                                 tree *, tree *);
 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
 	vect_recog_widen_mult_pattern,
 	vect_recog_widen_sum_pattern,
 	vect_recog_dot_prod_pattern,
-	vect_recog_pow_pattern};
+	vect_recog_pow_pattern,
+	vect_recog_widen_shift_pattern,
+        vect_recog_over_widening_pattern,
+	vect_recog_mixed_size_cond_pattern};
 
 
-/* Function widened_name_p
+/* Check whether STMT2 is in the same loop or basic block as STMT1.
+   Which of the two applies depends on whether we're currently doing
+   loop-based or basic-block-based vectorization, as determined by
+   the vinfo_for_stmt for STMT1 (which must be defined).
 
-   Check whether NAME, an ssa-name used in USE_STMT,
-   is a result of a type-promotion, such that:
-     DEF_STMT: NAME = NOP (name0)
-   where the type of name0 (HALF_TYPE) is smaller than the type of NAME.
-*/
+   If this returns true, vinfo_for_stmt for STMT2 is guaranteed
+   to be defined as well.  */
 
 static bool
-widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt)
+vect_same_loop_or_bb_p (gimple stmt1, gimple stmt2)
+{
+  stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
+  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
+  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
+
+  if (!gimple_bb (stmt2))
+    return false;
+
+  if (loop_vinfo)
+    {
+      struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+      if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt2)))
+	return false;
+    }
+  else
+    {
+      if (gimple_bb (stmt2) != BB_VINFO_BB (bb_vinfo)
+	  || gimple_code (stmt2) == GIMPLE_PHI)
+	return false;
+    }
+
+  gcc_assert (vinfo_for_stmt (stmt2));
+  return true;
+}
+
+/* If the LHS of DEF_STMT has a single use, and that statement is
+   in the same loop or basic block, return it.  */
+
+static gimple
+vect_single_imm_use (gimple def_stmt)
+{
+  tree lhs = gimple_assign_lhs (def_stmt);
+  use_operand_p use_p;
+  gimple use_stmt;
+
+  if (!single_imm_use (lhs, &use_p, &use_stmt))
+    return NULL;
+
+  if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
+    return NULL;
+
+  return use_stmt;
+}
+
+/* Check whether NAME, an ssa-name used in USE_STMT,
+   is a result of a type promotion or demotion, such that:
+      DEF_STMT: NAME = NOP (name0)
+   where the type of name0 (ORIG_TYPE) is smaller/bigger than the type of NAME.
+   If CHECK_SIGN is TRUE, check that either both types are signed or both are
+   unsigned.  */
+
+static bool
+type_conversion_p (tree name, gimple use_stmt, bool check_sign,
+                   tree *orig_type, gimple *def_stmt, bool *promotion)
 {
   tree dummy;
   gimple dummy_gimple;
@@ -74,35 +135,43 @@
   tree oprnd0;
   enum vect_def_type dt;
   tree def;
+  bb_vec_info bb_vinfo;
 
   stmt_vinfo = vinfo_for_stmt (use_stmt);
   loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
+  bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
 
-  if (!vect_is_simple_use (name, loop_vinfo, NULL, def_stmt, &def, &dt))
+  if (!vect_is_simple_use (name, loop_vinfo, bb_vinfo, def_stmt, &def, &dt))
     return false;
 
   if (dt != vect_internal_def
       && dt != vect_external_def && dt != vect_constant_def)
     return false;
 
-  if (! *def_stmt)
+  if (!*def_stmt)
     return false;
 
   if (!is_gimple_assign (*def_stmt))
     return false;
 
-  if (gimple_assign_rhs_code (*def_stmt) != NOP_EXPR)
+  if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
     return false;
 
   oprnd0 = gimple_assign_rhs1 (*def_stmt);
 
-  *half_type = TREE_TYPE (oprnd0);
-  if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*half_type)
-      || (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type))
-      || (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2)))
+  *orig_type = TREE_TYPE (oprnd0);
+  if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
+      || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
+    return false;
+
+  if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
+    *promotion = true;
+  else if (TYPE_PRECISION (*orig_type) >= (TYPE_PRECISION (type) * 2))
+    *promotion = false;
+  else
     return false;
 
-  if (!vect_is_simple_use (oprnd0, loop_vinfo, NULL, &dummy_gimple, &dummy,
+  if (!vect_is_simple_use (oprnd0, loop_vinfo, bb_vinfo, &dummy_gimple, &dummy,
                            &dt))
     return false;
 
@@ -145,9 +214,9 @@
 
    Input:
 
-   * LAST_STMT: A stmt from which the pattern search begins. In the example,
-   when this function is called with S7, the pattern {S3,S4,S5,S6,S7} will be
-   detected.
+   * STMTS: Contains a stmt from which the pattern search begins.  In the
+   example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
+   will be detected.
 
    Output:
 
@@ -168,9 +237,10 @@
          inner-loop nested in an outer-loop that us being vectorized).  */
 
 static gimple
-vect_recog_dot_prod_pattern (gimple last_stmt, tree *type_in, tree *type_out)
+vect_recog_dot_prod_pattern (VEC (gimple, heap) **stmts, tree *type_in,
+			     tree *type_out)
 {
-  gimple stmt;
+  gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
   tree oprnd0, oprnd1;
   tree oprnd00, oprnd01;
   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
@@ -178,8 +248,14 @@
   gimple pattern_stmt;
   tree prod_type;
   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
-  struct loop *loop = LOOP_VINFO_LOOP (loop_info);
+  struct loop *loop;
   tree var, rhs;
+  bool promotion;
+
+  if (!loop_info)
+    return NULL;
+
+  loop = LOOP_VINFO_LOOP (loop_info);
 
   if (!is_gimple_assign (last_stmt))
     return NULL;
@@ -238,7 +314,9 @@
         return NULL;
       stmt = last_stmt;
 
-      if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt))
+      if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
+                             &promotion)
+          && promotion)
         {
           stmt = def_stmt;
           oprnd0 = gimple_assign_rhs1 (stmt);
@@ -293,10 +371,14 @@
       if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
           || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
         return NULL;
-      if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt))
+      if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
+                              &promotion)
+          || !promotion)
         return NULL;
       oprnd00 = gimple_assign_rhs1 (def_stmt);
-      if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt))
+      if (!type_conversion_p (oprnd0, stmt, true, &half_type1, &def_stmt,
+                              &promotion)
+          || !promotion)
         return NULL;
       oprnd01 = gimple_assign_rhs1 (def_stmt);
       if (!types_compatible_p (half_type0, half_type1))
@@ -327,6 +409,88 @@
   return pattern_stmt;
 }
 
+
+/* Handle widening operation by a constant.  At the moment we support MULT_EXPR
+   and LSHIFT_EXPR.
+
+   For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
+   we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
+
+   Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
+   HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
+   that satisfies the above restrictions,  we can perform a widening opeartion
+   from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
+   with a_it = (interm_type) a_t;  */
+
+static bool
+vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
+                               tree const_oprnd, tree *oprnd,
+                               VEC (gimple, heap) **stmts, tree type,
+                               tree *half_type, gimple def_stmt)
+{
+  tree new_type, new_oprnd, tmp;
+  gimple new_stmt;
+
+  if (code != MULT_EXPR && code != LSHIFT_EXPR)
+    return false;
+
+  if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
+        || (code == LSHIFT_EXPR
+            && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
+                != 1))
+      && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
+    {
+      /* CONST_OPRND is a constant of HALF_TYPE.  */
+      *oprnd = gimple_assign_rhs1 (def_stmt);
+      return true;
+    }
+
+  if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
+    return false;
+
+  if (!vect_same_loop_or_bb_p (stmt, def_stmt))
+    return false;
+
+  /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
+     a type 2 times bigger than HALF_TYPE.  */
+  new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
+                                             TYPE_UNSIGNED (type));
+  if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
+      || (code == LSHIFT_EXPR
+          && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
+    return false;
+
+  /* Use NEW_TYPE for widening operation.  */
+  if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
+    {
+      new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
+      /* Check if the already created pattern stmt is what we need.  */
+      if (!is_gimple_assign (new_stmt)
+          || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
+          || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
+        return false;
+
+      VEC_safe_push (gimple, heap, *stmts, def_stmt);
+      *oprnd = gimple_assign_lhs (new_stmt);
+    }
+  else
+    {
+      /* Create a_T = (NEW_TYPE) a_t;  */
+      *oprnd = gimple_assign_rhs1 (def_stmt);
+      tmp = create_tmp_var (new_type, NULL);
+      add_referenced_var (tmp);
+      new_oprnd = make_ssa_name (tmp, NULL);
+      new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
+                                               NULL_TREE);
+      STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
+      VEC_safe_push (gimple, heap, *stmts, def_stmt);
+      *oprnd = new_oprnd;
+    }
+
+  *half_type = new_type;
+  return true;
+}
+
 /* Function vect_recog_widen_mult_pattern
 
    Try to find the following pattern:
@@ -342,37 +506,81 @@
 
    where type 'TYPE' is at least double the size of type 'type'.
 
-   Input:
+   Also detect unsgigned cases:
 
-   * LAST_STMT: A stmt from which the pattern search begins. In the example,
-   when this function is called with S5, the pattern {S3,S4,S5} is be detected.
+     unsigned type a_t, b_t;
+     unsigned TYPE u_prod_T;
+     TYPE a_T, b_T, prod_T;
+
+     S1  a_t = ;
+     S2  b_t = ;
+     S3  a_T = (TYPE) a_t;
+     S4  b_T = (TYPE) b_t;
+     S5  prod_T = a_T * b_T;
+     S6  u_prod_T = (unsigned TYPE) prod_T;
+
+   and multiplication by constants:
+
+     type a_t;
+     TYPE a_T, prod_T;
+
+     S1  a_t = ;
+     S3  a_T = (TYPE) a_t;
+     S5  prod_T = a_T * CONST;
+
+   A special case of multiplication by constants is when 'TYPE' is 4 times
+   bigger than 'type', but CONST fits an intermediate type 2 times smaller
+   than 'TYPE'.  In that case we create an additional pattern stmt for S3
+   to create a variable of the intermediate type, and perform widen-mult
+   on the intermediate type as well:
+
+     type a_t;
+     interm_type a_it;
+     TYPE a_T, prod_T,  prod_T';
+
+     S1  a_t = ;
+     S3  a_T = (TYPE) a_t;
+           '--> a_it = (interm_type) a_t;
+     S5  prod_T = a_T * CONST;
+           '--> prod_T' = a_it w* CONST;
+
+   Input/Output:
+
+   * STMTS: Contains a stmt from which the pattern search begins.  In the
+   example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
+   is detected.  In case of unsigned widen-mult, the original stmt (S5) is
+   replaced with S6 in STMTS.  In case of multiplication by a constant
+   of an intermediate type (the last case above), STMTS also contains S3
+   (inserted before S5).
 
    Output:
 
    * TYPE_IN: The type of the input arguments to the pattern.
 
-   * TYPE_OUT: The type of the output  of this pattern.
+   * TYPE_OUT: The type of the output of this pattern.
 
    * Return value: A new stmt that will be used to replace the sequence of
-   stmts that constitute the pattern. In this case it will be:
+   stmts that constitute the pattern.  In this case it will be:
         WIDEN_MULT <a_t, b_t>
 */
 
 static gimple
-vect_recog_widen_mult_pattern (gimple last_stmt,
-			       tree *type_in,
-			       tree *type_out)
+vect_recog_widen_mult_pattern (VEC (gimple, heap) **stmts,
+                               tree *type_in, tree *type_out)
 {
+  gimple last_stmt = VEC_pop (gimple, *stmts);
   gimple def_stmt0, def_stmt1;
   tree oprnd0, oprnd1;
   tree type, half_type0, half_type1;
   gimple pattern_stmt;
-  tree vectype, vectype_out;
+  tree vectype, vectype_out = NULL_TREE;
   tree dummy;
   tree var;
   enum tree_code dummy_code;
   int dummy_int;
   VEC (tree, heap) *dummy_vec;
+  bool op1_ok;
+  bool promotion;
 
   if (!is_gimple_assign (last_stmt))
     return NULL;
@@ -391,15 +599,58 @@
       || !types_compatible_p (TREE_TYPE (oprnd1), type))
     return NULL;
 
-  /* Check argument 0 */
-  if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0))
+  /* Check argument 0.  */
+  if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
+                          &promotion)
+      || !promotion)
     return NULL;
-  oprnd0 = gimple_assign_rhs1 (def_stmt0);
+   /* Check argument 1.  */
+  op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
+                              &def_stmt1, &promotion);
+  if (op1_ok && promotion)
+    {
+      oprnd0 = gimple_assign_rhs1 (def_stmt0);
+      oprnd1 = gimple_assign_rhs1 (def_stmt1);
+    }
+  else
+    {
+      if (TREE_CODE (oprnd1) == INTEGER_CST
+          && TREE_CODE (half_type0) == INTEGER_TYPE
+          && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
+                                            &oprnd0, stmts, type,
+                                            &half_type0, def_stmt0))
+        half_type1 = half_type0;
+      else
+        return NULL;
+    }
 
-  /* Check argument 1 */
-  if (!widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1))
-    return NULL;
-  oprnd1 = gimple_assign_rhs1 (def_stmt1);
+  /* Handle unsigned case.  Look for
+     S6  u_prod_T = (unsigned TYPE) prod_T;
+     Use unsigned TYPE as the type for WIDEN_MULT_EXPR.  */
+  if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
+    {
+      gimple use_stmt;
+      tree use_lhs;
+      tree use_type;
+
+      if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
+        return NULL;
+
+      use_stmt = vect_single_imm_use (last_stmt);
+      if (!use_stmt || !is_gimple_assign (use_stmt)
+	  || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
+        return NULL;
+
+      use_lhs = gimple_assign_lhs (use_stmt);
+      use_type = TREE_TYPE (use_lhs);
+      if (!INTEGRAL_TYPE_P (use_type)
+          || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
+          || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
+        return NULL;
+
+      type = use_type;
+      last_stmt = use_stmt;
+    }
 
   if (!types_compatible_p (half_type0, half_type1))
     return NULL;
@@ -431,6 +682,7 @@
   if (vect_print_dump_info (REPORT_DETAILS))
     print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
 
+  VEC_safe_push (gimple, heap, *stmts, last_stmt);
   return pattern_stmt;
 }
 
@@ -462,8 +714,9 @@
 */
 
 static gimple
-vect_recog_pow_pattern (gimple last_stmt, tree *type_in, tree *type_out)
+vect_recog_pow_pattern (VEC (gimple, heap) **stmts, tree *type_in, tree *type_out)
 {
+  gimple last_stmt = VEC_index (gimple, *stmts, 0);
   tree fn, base, exp = NULL;
   gimple stmt;
   tree var;
@@ -574,16 +827,24 @@
 	 inner-loop nested in an outer-loop that us being vectorized).  */
 
 static gimple
-vect_recog_widen_sum_pattern (gimple last_stmt, tree *type_in, tree *type_out)
+vect_recog_widen_sum_pattern (VEC (gimple, heap) **stmts, tree *type_in,
+                              tree *type_out)
 {
+  gimple last_stmt = VEC_index (gimple, *stmts, 0); 
   gimple stmt;
   tree oprnd0, oprnd1;
   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
   tree type, half_type;
   gimple pattern_stmt;
   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
-  struct loop *loop = LOOP_VINFO_LOOP (loop_info);
+  struct loop *loop;
   tree var;
+  bool promotion;
+
+  if (!loop_info)
+    return NULL;
+
+  loop = LOOP_VINFO_LOOP (loop_info);
 
   if (!is_gimple_assign (last_stmt))
     return NULL;
@@ -612,14 +873,16 @@
       || !types_compatible_p (TREE_TYPE (oprnd1), type))
     return NULL;
 
-  /* So far so good. Since last_stmt was detected as a (summation) reduction,
+  /* So far so good.  Since last_stmt was detected as a (summation) reduction,
      we know that oprnd1 is the reduction variable (defined by a loop-header
      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
      Left to check that oprnd0 is defined by a cast from type 'type' to type
      'TYPE'.  */
 
-  if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt))
-    return NULL;
+  if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
+                          &promotion)
+      || !promotion)
+     return NULL;
 
   oprnd0 = gimple_assign_rhs1 (stmt);
   *type_in = half_type;
@@ -641,10 +904,715 @@
      when doing outer-loop vectorization.  */
   gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
 
+  VEC_safe_push (gimple, heap, *stmts, last_stmt);
   return pattern_stmt;
 }
 
 
+/* Return TRUE if the operation in STMT can be performed on a smaller type.
+
+   Input:
+   STMT - a statement to check.
+   DEF - we support operations with two operands, one of which is constant.
+         The other operand can be defined by a demotion operation, or by a
+         previous statement in a sequence of over-promoted operations.  In the
+         later case DEF is used to replace that operand.  (It is defined by a
+         pattern statement we created for the previous statement in the
+         sequence).
+
+   Input/output:
+   NEW_TYPE - Output: a smaller type that we are trying to use.  Input: if not
+         NULL, it's the type of DEF.
+   STMTS - additional pattern statements.  If a pattern statement (type
+         conversion) is created in this function, its original statement is
+         added to STMTS.
+
+   Output:
+   OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
+         operands to use in the new pattern statement for STMT (will be created
+         in vect_recog_over_widening_pattern ()).
+   NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
+         statements for STMT: the first one is a type promotion and the second
+         one is the operation itself.  We return the type promotion statement
+         in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_STMT of
+         the second pattern statement.  */
+
+static bool
+vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
+                                  tree *op0, tree *op1, gimple *new_def_stmt,
+                                  VEC (gimple, heap) **stmts)
+{
+  enum tree_code code;
+  tree const_oprnd, oprnd;
+  tree interm_type = NULL_TREE, half_type, tmp, new_oprnd, type;
+  gimple def_stmt, new_stmt;
+  bool first = false;
+  bool promotion;
+
+  *new_def_stmt = NULL;
+
+  if (!is_gimple_assign (stmt))
+    return false;
+
+  code = gimple_assign_rhs_code (stmt);
+  if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
+      && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
+    return false;
+
+  oprnd = gimple_assign_rhs1 (stmt);
+  const_oprnd = gimple_assign_rhs2 (stmt);
+  type = gimple_expr_type (stmt);
+
+  if (TREE_CODE (oprnd) != SSA_NAME
+      || TREE_CODE (const_oprnd) != INTEGER_CST)
+    return false;
+
+  /* If we are in the middle of a sequence, we use DEF from a previous
+     statement.  Otherwise, OPRND has to be a result of type promotion.  */
+  if (*new_type)
+    {
+      half_type = *new_type;
+      oprnd = def;
+    }
+  else
+    {
+      first = true;
+      if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
+			      &promotion)
+	  || !promotion
+	  || !vect_same_loop_or_bb_p (stmt, def_stmt))
+        return false;
+    }
+
+  /* Can we perform the operation on a smaller type?  */
+  switch (code)
+    {
+      case BIT_IOR_EXPR:
+      case BIT_XOR_EXPR:
+      case BIT_AND_EXPR:
+        if (!int_fits_type_p (const_oprnd, half_type))
+          {
+            /* HALF_TYPE is not enough.  Try a bigger type if possible.  */
+            if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
+              return false;
+
+            interm_type = build_nonstandard_integer_type (
+                        TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
+            if (!int_fits_type_p (const_oprnd, interm_type))
+              return false;
+          }
+
+        break;
+
+      case LSHIFT_EXPR:
+        /* Try intermediate type - HALF_TYPE is not enough for sure.  */
+        if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
+          return false;
+
+        /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
+          (e.g., if the original value was char, the shift amount is at most 8
+           if we want to use short).  */
+        if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
+          return false;
+
+        interm_type = build_nonstandard_integer_type (
+                        TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
+
+        if (!vect_supportable_shift (code, interm_type))
+          return false;
+
+        break;
+
+      case RSHIFT_EXPR:
+        if (vect_supportable_shift (code, half_type))
+          break;
+
+        /* Try intermediate type - HALF_TYPE is not supported.  */
+        if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
+          return false;
+
+        interm_type = build_nonstandard_integer_type (
+                        TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
+
+        if (!vect_supportable_shift (code, interm_type))
+          return false;
+
+        break;
+
+      default:
+        gcc_unreachable ();
+    }
+
+  /* There are four possible cases:
+     1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
+        the first statement in the sequence)
+        a. The original, HALF_TYPE, is not enough - we replace the promotion
+           from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
+        b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
+           promotion.
+     2. OPRND is defined by a pattern statement we created.
+        a. Its type is not sufficient for the operation, we create a new stmt:
+           a type conversion for OPRND from HALF_TYPE to INTERM_TYPE.  We store
+           this statement in NEW_DEF_STMT, and it is later put in
+           STMT_VINFO_PATTERN_DEF_STMT of the pattern statement for STMT.
+        b. OPRND is good to use in the new statement.  */
+  if (first)
+    {
+      if (interm_type)
+        {
+          /* Replace the original type conversion HALF_TYPE->TYPE with
+             HALF_TYPE->INTERM_TYPE.  */
+          if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
+            {
+              new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
+              /* Check if the already created pattern stmt is what we need.  */
+              if (!is_gimple_assign (new_stmt)
+                  || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
+                  || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
+                return false;
+
+              VEC_safe_push (gimple, heap, *stmts, def_stmt);
+              oprnd = gimple_assign_lhs (new_stmt);
+            }
+          else
+            {
+              /* Create NEW_OPRND = (INTERM_TYPE) OPRND.  */
+              oprnd = gimple_assign_rhs1 (def_stmt);
+              tmp = create_tmp_reg (interm_type, NULL);
+              add_referenced_var (tmp);
+              new_oprnd = make_ssa_name (tmp, NULL);
+              new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
+                                                       oprnd, NULL_TREE);
+              SSA_NAME_DEF_STMT (new_oprnd) = new_stmt;
+              STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
+              VEC_safe_push (gimple, heap, *stmts, def_stmt);
+              oprnd = new_oprnd;
+            }
+        }
+      else
+        {
+          /* Retrieve the operand before the type promotion.  */
+          oprnd = gimple_assign_rhs1 (def_stmt);
+        }
+    }
+  else
+    {
+      if (interm_type)
+        {
+          /* Create a type conversion HALF_TYPE->INTERM_TYPE.  */
+          tmp = create_tmp_reg (interm_type, NULL);
+          add_referenced_var (tmp);
+          new_oprnd = make_ssa_name (tmp, NULL);
+          new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
+                                                   oprnd, NULL_TREE);
+          SSA_NAME_DEF_STMT (new_oprnd) = new_stmt;
+          oprnd = new_oprnd;
+          *new_def_stmt = new_stmt;
+        }
+
+      /* Otherwise, OPRND is already set.  */
+    }
+
+  if (interm_type)
+    *new_type = interm_type;
+  else
+    *new_type = half_type;
+
+  *op0 = oprnd;
+  *op1 = fold_convert (*new_type, const_oprnd);
+
+  return true;
+}
+
+
+/* Try to find a statement or a sequence of statements that can be performed
+   on a smaller type:
+
+     type x_t;
+     TYPE x_T, res0_T, res1_T;
+   loop:
+     S1  x_t = *p;
+     S2  x_T = (TYPE) x_t;
+     S3  res0_T = op (x_T, C0);
+     S4  res1_T = op (res0_T, C1);
+     S5  ... = () res1_T;  - type demotion
+
+   where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
+   constants.
+   Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
+   be 'type' or some intermediate type.  For now, we expect S5 to be a type
+   demotion operation.  We also check that S3 and S4 have only one use.
+.
+
+*/
+static gimple
+vect_recog_over_widening_pattern (VEC (gimple, heap) **stmts,
+                                  tree *type_in, tree *type_out)
+{
+  gimple stmt = VEC_pop (gimple, *stmts);
+  gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
+  tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
+  tree var = NULL_TREE, new_type = NULL_TREE, tmp, new_oprnd;
+  bool first;
+  tree type = NULL;
+
+  first = true;
+  while (1)
+    {
+      if (!vinfo_for_stmt (stmt)
+          || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
+        return NULL;
+
+      new_def_stmt = NULL;
+      if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
+                                             &op0, &op1, &new_def_stmt,
+                                             stmts))
+        {
+          if (first)
+            return NULL;
+          else
+            break;
+        }
+
+      /* STMT can be performed on a smaller type.  Check its uses.  */
+      use_stmt = vect_single_imm_use (stmt);
+      if (!use_stmt || !is_gimple_assign (use_stmt))
+        return NULL;
+
+      /* Create pattern statement for STMT.  */
+      vectype = get_vectype_for_scalar_type (new_type);
+      if (!vectype)
+        return NULL;
+
+      /* We want to collect all the statements for which we create pattern
+         statetments, except for the case when the last statement in the
+         sequence doesn't have a corresponding pattern statement.  In such
+         case we associate the last pattern statement with the last statement
+         in the sequence.  Therefore, we only add the original statement to
+         the list if we know that it is not the last.  */
+      if (prev_stmt)
+        VEC_safe_push (gimple, heap, *stmts, prev_stmt);
+
+      var = vect_recog_temp_ssa_var (new_type, NULL);
+      pattern_stmt = gimple_build_assign_with_ops (
+                          gimple_assign_rhs_code (stmt), var, op0, op1);
+      SSA_NAME_DEF_STMT (var) = pattern_stmt;
+      STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
+      STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (stmt)) = new_def_stmt;
+
+      if (vect_print_dump_info (REPORT_DETAILS))
+        {
+          fprintf (vect_dump, "created pattern stmt: ");
+          print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
+        }
+
+      type = gimple_expr_type (stmt);
+      prev_stmt = stmt;
+      stmt = use_stmt;
+
+      first = false;
+    }
+
+  /* We got a sequence.  We expect it to end with a type demotion operation.
+     Otherwise, we quit (for now).  There are three possible cases: the
+     conversion is to NEW_TYPE (we don't do anything), the conversion is to
+     a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
+     NEW_TYPE differs (we create a new conversion statement).  */
+  if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
+    {
+      use_lhs = gimple_assign_lhs (use_stmt);
+      use_type = TREE_TYPE (use_lhs);
+      /* Support only type demotion or signedess change.  */
+      if (!INTEGRAL_TYPE_P (use_type)
+	  || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
+        return NULL;
+
+      /* Check that NEW_TYPE is not bigger than the conversion result.  */
+      if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
+	return NULL;
+
+      if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
+          || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
+        {
+          /* Create NEW_TYPE->USE_TYPE conversion.  */
+          tmp = create_tmp_reg (use_type, NULL);
+          add_referenced_var (tmp);
+          new_oprnd = make_ssa_name (tmp, NULL);
+          pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
+                                                       var, NULL_TREE);
+          SSA_NAME_DEF_STMT (new_oprnd) = pattern_stmt;
+          STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
+
+          *type_in = get_vectype_for_scalar_type (new_type);
+          *type_out = get_vectype_for_scalar_type (use_type);
+
+          /* We created a pattern statement for the last statement in the
+             sequence, so we don't need to associate it with the pattern
+             statement created for PREV_STMT.  Therefore, we add PREV_STMT
+             to the list in order to mark it later in vect_pattern_recog_1.  */
+          if (prev_stmt)
+            VEC_safe_push (gimple, heap, *stmts, prev_stmt);
+        }
+      else
+        {
+          if (prev_stmt)
+            STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (use_stmt))
+               = STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (prev_stmt));
+
+          *type_in = vectype;
+          *type_out = NULL_TREE;
+        }
+
+      VEC_safe_push (gimple, heap, *stmts, use_stmt);
+    }
+  else
+    /* TODO: support general case, create a conversion to the correct type.  */
+    return NULL;
+
+  /* Pattern detected.  */
+  if (vect_print_dump_info (REPORT_DETAILS))
+    {
+      fprintf (vect_dump, "vect_recog_over_widening_pattern: detected: ");
+      print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
+    }
+
+  return pattern_stmt;
+}
+
+
+/* Detect widening shift pattern:
+
+   type a_t;
+   TYPE a_T, res_T;
+
+   S1 a_t = ;
+   S2 a_T = (TYPE) a_t;
+   S3 res_T = a_T << CONST;
+
+  where type 'TYPE' is at least double the size of type 'type'.
+
+  Also detect cases where the shift result is immediately converted
+  to another type 'result_type' that is no larger in size than 'TYPE'.
+  In those cases we perform a widen-shift that directly results in
+  'result_type', to avoid a possible over-widening situation:
+
+  type a_t;
+  TYPE a_T, res_T;
+  result_type res_result;
+
+  S1 a_t = ;
+  S2 a_T = (TYPE) a_t;
+  S3 res_T = a_T << CONST;
+  S4 res_result = (result_type) res_T;
+      '--> res_result' = a_t w<< CONST;
+
+  And a case when 'TYPE' is 4 times bigger than 'type'.  In that case we
+  create an additional pattern stmt for S2 to create a variable of an
+  intermediate type, and perform widen-shift on the intermediate type:
+
+  type a_t;
+  interm_type a_it;
+  TYPE a_T, res_T, res_T';
+
+  S1 a_t = ;
+  S2 a_T = (TYPE) a_t;
+      '--> a_it = (interm_type) a_t;
+  S3 res_T = a_T << CONST;
+      '--> res_T' = a_it <<* CONST;
+
+  Input/Output:
+
+  * STMTS: Contains a stmt from which the pattern search begins.
+    In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
+    in STMTS.  When an intermediate type is used and a pattern statement is
+    created for S2, we also put S2 here (before S3).
+
+  Output:
+
+  * TYPE_IN: The type of the input arguments to the pattern.
+
+  * TYPE_OUT: The type of the output of this pattern.
+
+  * Return value: A new stmt that will be used to replace the sequence of
+    stmts that constitute the pattern.  In this case it will be:
+    WIDEN_LSHIFT_EXPR <a_t, CONST>.  */
+
+static gimple
+vect_recog_widen_shift_pattern (VEC (gimple, heap) **stmts,
+                                tree *type_in, tree *type_out)
+{
+  gimple last_stmt = VEC_pop (gimple, *stmts);
+  gimple def_stmt0;
+  tree oprnd0, oprnd1;
+  tree type, half_type0;
+  gimple pattern_stmt;
+  tree vectype, vectype_out = NULL_TREE;
+  tree dummy;
+  tree var;
+  enum tree_code dummy_code;
+  int dummy_int;
+  VEC (tree, heap) * dummy_vec;
+  gimple use_stmt;
+  bool promotion;
+
+  if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
+    return NULL;
+
+  if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
+    return NULL;
+
+  if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
+    return NULL;
+
+  oprnd0 = gimple_assign_rhs1 (last_stmt);
+  oprnd1 = gimple_assign_rhs2 (last_stmt);
+  if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
+    return NULL;
+
+  /* Check operand 0: it has to be defined by a type promotion.  */
+  if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
+                          &promotion)
+      || !promotion)
+     return NULL;
+
+  /* Check operand 1: has to be positive.  We check that it fits the type
+     in vect_handle_widen_op_by_const ().  */
+  if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
+    return NULL;
+
+  oprnd0 = gimple_assign_rhs1 (def_stmt0);
+  type = gimple_expr_type (last_stmt);
+
+  /* Check for subsequent conversion to another type.  */
+  use_stmt = vect_single_imm_use (last_stmt);
+  if (use_stmt && is_gimple_assign (use_stmt)
+      && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
+      && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
+    {
+      tree use_lhs = gimple_assign_lhs (use_stmt);
+      tree use_type = TREE_TYPE (use_lhs);
+
+      if (INTEGRAL_TYPE_P (use_type)
+	  && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
+	{
+	  last_stmt = use_stmt;
+	  type = use_type;
+	}
+    }
+
+  /* Check if this a widening operation.  */
+  if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
+                                      &oprnd0, stmts,
+                                      type, &half_type0, def_stmt0))
+    return NULL;
+
+  /* Pattern detected.  */
+  if (vect_print_dump_info (REPORT_DETAILS))
+    fprintf (vect_dump, "vect_recog_widen_shift_pattern: detected: ");
+
+  /* Check target support.  */
+  vectype = get_vectype_for_scalar_type (half_type0);
+  vectype_out = get_vectype_for_scalar_type (type);
+
+  if (!vectype
+      || !vectype_out
+      || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
+                                          vectype_out, vectype,
+                                          &dummy, &dummy, &dummy_code,
+                                          &dummy_code, &dummy_int,
+                                          &dummy_vec))
+    return NULL;
+
+  *type_in = vectype;
+  *type_out = vectype_out;
+
+  /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
+  var = vect_recog_temp_ssa_var (type, NULL);
+  pattern_stmt =
+    gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
+
+  if (vect_print_dump_info (REPORT_DETAILS))
+    print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
+
+  VEC_safe_push (gimple, heap, *stmts, last_stmt);
+  return pattern_stmt;
+}
+
+/* Function vect_recog_mixed_size_cond_pattern
+
+   Try to find the following pattern:
+
+     type x_t, y_t;
+     TYPE a_T, b_T, c_T;
+   loop:
+     S1  a_T = x_t CMP y_t ? b_T : c_T;
+
+   where type 'TYPE' is an integral type which has different size
+   from 'type'.  b_T and c_T are either constants (and if 'TYPE' is wider
+   than 'type', the constants need to fit into an integer type
+   with the same width as 'type') or results of conversion from 'type'.
+
+   Input:
+
+   * LAST_STMT: A stmt from which the pattern search begins.
+
+   Output:
+
+   * TYPE_IN: The type of the input arguments to the pattern.
+
+   * TYPE_OUT: The type of the output of this pattern.
+
+   * Return value: A new stmt that will be used to replace the pattern.
+	Additionally a def_stmt is added.
+
+	a_it = x_t CMP y_t ? b_it : c_it;
+	a_T = (TYPE) a_it;  */
+
+static gimple
+vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **stmts, tree *type_in,
+				    tree *type_out)
+{
+  gimple last_stmt = VEC_index (gimple, *stmts, 0);
+  tree cond_expr, then_clause, else_clause;
+  stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
+  tree type, vectype, comp_vectype, comp_type, op, tmp;
+  enum machine_mode cmpmode;
+  gimple pattern_stmt, def_stmt;
+  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
+  tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
+  gimple def_stmt0 = NULL, def_stmt1 = NULL;
+  bool promotion;
+  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
+
+  if (!is_gimple_assign (last_stmt)
+      || gimple_assign_rhs_code (last_stmt) != COND_EXPR
+      || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
+    return NULL;
+
+  op = gimple_assign_rhs1 (last_stmt);
+  cond_expr = TREE_OPERAND (op, 0);
+  then_clause = TREE_OPERAND (op, 1);
+  else_clause = TREE_OPERAND (op, 2);
+
+  if (!COMPARISON_CLASS_P (cond_expr))
+    return NULL;
+
+  type = gimple_expr_type (last_stmt);
+  comp_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
+  comp_vectype = get_vectype_for_scalar_type (comp_type);
+  if (comp_vectype == NULL_TREE)
+    return NULL;
+
+  if (types_compatible_p (type, comp_type)
+      || !INTEGRAL_TYPE_P (comp_type)
+      || !INTEGRAL_TYPE_P (type))
+    return NULL;
+
+  if ((TREE_CODE (then_clause) != INTEGER_CST
+       && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
+			      &def_stmt0, &promotion))
+      || (TREE_CODE (else_clause) != INTEGER_CST
+	  && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
+				 &def_stmt1, &promotion)))
+    return NULL;
+
+  if (orig_type0 && orig_type1
+      && (!types_compatible_p (orig_type0, orig_type1)
+	  || !types_compatible_p (orig_type0, comp_type)))
+    return NULL;
+
+  if (orig_type0)
+    then_clause = gimple_assign_rhs1 (def_stmt0);
+
+  if (orig_type1)
+    else_clause = gimple_assign_rhs1 (def_stmt1);
+
+  cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
+  if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
+    return NULL;
+
+  vectype = get_vectype_for_scalar_type (type);
+  if (vectype == NULL_TREE)
+    return NULL;
+
+  if (!expand_vec_cond_expr_p (comp_vectype, TYPE_MODE (comp_vectype)))
+    return NULL;
+
+  if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode)
+      && ((TREE_CODE (then_clause) == INTEGER_CST
+           && !int_fits_type_p (then_clause, comp_type))
+          || (TREE_CODE (else_clause) == INTEGER_CST
+              && !int_fits_type_p (else_clause, comp_type))))
+    return NULL;
+
+  tmp = build3 (COND_EXPR, comp_type, unshare_expr (cond_expr),
+		fold_convert (comp_type, then_clause),
+	       	fold_convert (comp_type, else_clause));
+  def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (comp_type, NULL),
+				  tmp);
+
+  pattern_stmt
+    = gimple_build_assign_with_ops (NOP_EXPR,
+				    vect_recog_temp_ssa_var (type, NULL),
+				    gimple_assign_lhs (def_stmt), NULL_TREE);
+
+  STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = def_stmt;
+  def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
+  set_vinfo_for_stmt (def_stmt, def_stmt_info);
+  STMT_VINFO_VECTYPE (def_stmt_info) = comp_vectype;
+  *type_in = vectype;
+  *type_out = vectype;
+
+  if (vect_print_dump_info (REPORT_DETAILS))
+    fprintf (vect_dump, "vect_recog_mixed_size_cond_pattern: detected: ");
+
+  return pattern_stmt;
+}
+
+
+/* Mark statements that are involved in a pattern.  */
+
+static inline void
+vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
+                         tree pattern_vectype)
+{
+  stmt_vec_info pattern_stmt_info, def_stmt_info;
+  stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
+  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
+  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info);
+  gimple def_stmt;
+
+  set_vinfo_for_stmt (pattern_stmt,
+                      new_stmt_vec_info (pattern_stmt, loop_vinfo, bb_vinfo));
+  gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
+  pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
+
+  STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
+  STMT_VINFO_DEF_TYPE (pattern_stmt_info)
+	= STMT_VINFO_DEF_TYPE (orig_stmt_info);
+  STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
+  STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
+  STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
+  STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info)
+	= STMT_VINFO_PATTERN_DEF_STMT (orig_stmt_info);
+  if (STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info))
+    {
+      def_stmt = STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info);
+      def_stmt_info = vinfo_for_stmt (def_stmt);
+      if (def_stmt_info == NULL)
+	{
+	  def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
+	  set_vinfo_for_stmt (def_stmt, def_stmt_info);
+	}
+      gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
+      STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
+      STMT_VINFO_DEF_TYPE (def_stmt_info)
+	= STMT_VINFO_DEF_TYPE (orig_stmt_info);
+      if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
+	STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
+    }
+}
+
 /* Function vect_pattern_recog_1
 
    Input:
@@ -669,29 +1637,33 @@
 
 static void
 vect_pattern_recog_1 (
-	gimple (* vect_recog_func) (gimple, tree *, tree *),
-	gimple_stmt_iterator si)
+	gimple (* vect_recog_func) (VEC (gimple, heap) **, tree *, tree *),
+	gimple_stmt_iterator si,
+	VEC (gimple, heap) **stmts_to_replace)
 {
   gimple stmt = gsi_stmt (si), pattern_stmt;
-  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
-  stmt_vec_info pattern_stmt_info;
-  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+  stmt_vec_info stmt_info;
+  loop_vec_info loop_vinfo;
   tree pattern_vectype;
   tree type_in, type_out;
   enum tree_code code;
   int i;
   gimple next;
 
-  pattern_stmt = (* vect_recog_func) (stmt, &type_in, &type_out);
+  VEC_truncate (gimple, *stmts_to_replace, 0);
+  VEC_quick_push (gimple, *stmts_to_replace, stmt);
+  pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
   if (!pattern_stmt)
     return;
 
+  stmt = VEC_last (gimple, *stmts_to_replace);
+  stmt_info = vinfo_for_stmt (stmt);
+  loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ 
   if (VECTOR_MODE_P (TYPE_MODE (type_in)))
     {
       /* No need to check target support (already checked by the pattern
          recognition function).  */
-      if (type_out)
-	gcc_assert (VECTOR_MODE_P (TYPE_MODE (type_out)));
       pattern_vectype = type_out ? type_out : type_in;
     }
   else
@@ -736,22 +1708,32 @@
     }
 
   /* Mark the stmts that are involved in the pattern. */
-  gsi_insert_before (&si, pattern_stmt, GSI_SAME_STMT);
-  set_vinfo_for_stmt (pattern_stmt,
-		      new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL));
-  pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
-
-  STMT_VINFO_RELATED_STMT (pattern_stmt_info) = stmt;
-  STMT_VINFO_DEF_TYPE (pattern_stmt_info) = STMT_VINFO_DEF_TYPE (stmt_info);
-  STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
-  STMT_VINFO_IN_PATTERN_P (stmt_info) = true;
-  STMT_VINFO_RELATED_STMT (stmt_info) = pattern_stmt;
+  vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
 
   /* Patterns cannot be vectorized using SLP, because they change the order of
      computation.  */
-  FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
-    if (next == stmt)
-      VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i); 
+  if (loop_vinfo)
+    FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
+      if (next == stmt)
+        VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i);
+
+  /* It is possible that additional pattern stmts are created and inserted in
+     STMTS_TO_REPLACE.  We create a stmt_info for each of them, and mark the
+     relevant statements.  */
+  for (i = 0; VEC_iterate (gimple, *stmts_to_replace, i, stmt)
+	      && (unsigned) i < (VEC_length (gimple, *stmts_to_replace) - 1);
+       i++)
+    {
+      stmt_info = vinfo_for_stmt (stmt);
+      pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
+      if (vect_print_dump_info (REPORT_DETAILS))
+        {
+          fprintf (vect_dump, "additional pattern stmt: ");
+          print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
+        }
+
+      vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
+    }
 }
 
 
@@ -761,8 +1743,8 @@
    LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
         computation idioms.
 
-   Output - for each computation idiom that is detected we insert a new stmt
-        that provides the same functionality and that can be vectorized. We
+   Output - for each computation idiom that is detected we create a new stmt
+        that provides the same functionality and that can be vectorized.  We
         also record some information in the struct_stmt_info of the relevant
         stmts, as explained below:
 
@@ -777,79 +1759,113 @@
          S5: ... = ..use(a_0)..         -       -               -
 
    Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
-   represented by a single stmt. We then:
-   - create a new stmt S6 that will replace the pattern.
-   - insert the new stmt S6 before the last stmt in the pattern
+   represented by a single stmt.  We then:
+   - create a new stmt S6 equivalent to the pattern (the stmt is not
+     inserted into the code)
    - fill in the STMT_VINFO fields as follows:
 
                                   in_pattern_p  related_stmt    vec_stmt
          S1: a_i = ....                 -       -               -
          S2: a_2 = ..use(a_i)..         -       -               -
          S3: a_1 = ..use(a_2)..         -       -               -
-       > S6: a_new = ....               -       S4              -
          S4: a_0 = ..use(a_1)..         true    S6              -
+          '---> S6: a_new = ....        -       S4              -
          S5: ... = ..use(a_0)..         -       -               -
 
    (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
-    to each other through the RELATED_STMT field).
+   to each other through the RELATED_STMT field).
 
    S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
    of S4 because it will replace all its uses.  Stmts {S1,S2,S3} will
    remain irrelevant unless used by stmts other than S4.
 
    If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
-   (because they are marked as irrelevant). It will vectorize S6, and record
-   a pointer to the new vector stmt VS6 both from S6 (as usual), and also
-   from S4. We do that so that when we get to vectorizing stmts that use the
-   def of S4 (like S5 that uses a_0), we'll know where to take the relevant
-   vector-def from. S4 will be skipped, and S5 will be vectorized as usual:
+   (because they are marked as irrelevant).  It will vectorize S6, and record
+   a pointer to the new vector stmt VS6 from S6 (as usual).
+   S4 will be skipped, and S5 will be vectorized as usual:
 
                                   in_pattern_p  related_stmt    vec_stmt
          S1: a_i = ....                 -       -               -
          S2: a_2 = ..use(a_i)..         -       -               -
          S3: a_1 = ..use(a_2)..         -       -               -
        > VS6: va_new = ....             -       -               -
-         S6: a_new = ....               -       S4              VS6
          S4: a_0 = ..use(a_1)..         true    S6              VS6
+          '---> S6: a_new = ....        -       S4              VS6
        > VS5: ... = ..vuse(va_new)..    -       -               -
          S5: ... = ..use(a_0)..         -       -               -
 
-   DCE could then get rid of {S1,S2,S3,S4,S5,S6} (if their defs are not used
+   DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
    elsewhere), and we'll end up with:
 
         VS6: va_new = ....
-        VS5: ... = ..vuse(va_new)..
+        VS5: ... = ..vuse(va_new)..  
 
-   If vectorization does not succeed, DCE will clean S6 away (its def is
-   not used), and we'll end up with the original sequence.
-*/
+   In case of more than one pattern statements, e.g., widen-mult with
+   intermediate type:
+
+     S1  a_t = ;
+     S2  a_T = (TYPE) a_t;
+           '--> S3: a_it = (interm_type) a_t;
+     S4  prod_T = a_T * CONST;
+           '--> S5: prod_T' = a_it w* CONST;
+   
+   there may be other users of a_T outside the pattern. In that case S2 will
+   be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
+   and vectorized.  The vector stmt VS2 will be recorded in S2, and VS3 will
+   be recorded in S3.  */
 
 void
-vect_pattern_recog (loop_vec_info loop_vinfo)
+vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
 {
-  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
-  basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
-  unsigned int nbbs = loop->num_nodes;
+  struct loop *loop;
+  basic_block *bbs, bb;
+  unsigned int nbbs;
   gimple_stmt_iterator si;
   unsigned int i, j;
-  gimple (* vect_recog_func_ptr) (gimple, tree *, tree *);
+  gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *);
+  VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1);
+  gimple stmt;
 
   if (vect_print_dump_info (REPORT_DETAILS))
     fprintf (vect_dump, "=== vect_pattern_recog ===");
 
-  /* Scan through the loop stmts, applying the pattern recognition
+  if (loop_vinfo)
+    {
+      loop = LOOP_VINFO_LOOP (loop_vinfo);
+      bbs = LOOP_VINFO_BBS (loop_vinfo);
+      nbbs = loop->num_nodes;
+    }
+  else
+    {
+      bb = BB_VINFO_BB (bb_vinfo);
+      nbbs = 1;
+      bbs = XNEW (basic_block);
+      bbs[0] = bb;
+    }
+
+  /* Scan through the stmts, applying the pattern recognition
      functions starting at each stmt visited:  */
   for (i = 0; i < nbbs; i++)
     {
       basic_block bb = bbs[i];
       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
         {
+	  if (bb_vinfo && (stmt = gsi_stmt (si))
+	      && vinfo_for_stmt (stmt)
+	      && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
+	   continue;
+
           /* Scan over all generic vect_recog_xxx_pattern functions.  */
           for (j = 0; j < NUM_PATTERNS; j++)
             {
               vect_recog_func_ptr = vect_vect_recog_func_ptrs[j];
-              vect_pattern_recog_1 (vect_recog_func_ptr, si);
+	      vect_pattern_recog_1 (vect_recog_func_ptr, si,
+				    &stmts_to_replace);
             }
         }
     }
+
+  VEC_free (gimple, heap, stmts_to_replace);
+  if (bb_vinfo)
+    free (bbs);
 }
--- a/src/gcc/tree-vect-slp.c
+++ b/src/gcc/tree-vect-slp.c
@@ -67,15 +67,16 @@
 static void
 vect_free_slp_tree (slp_tree node)
 {
+  int i;
+  slp_void_p child;
+
   if (!node)
     return;
 
-  if (SLP_TREE_LEFT (node))
-    vect_free_slp_tree (SLP_TREE_LEFT (node));
-
-  if (SLP_TREE_RIGHT (node))
-    vect_free_slp_tree (SLP_TREE_RIGHT (node));
+  FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
+    vect_free_slp_tree ((slp_tree) child);
 
+  VEC_free (slp_void_p, heap, SLP_TREE_CHILDREN (node));
   VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
 
   if (SLP_TREE_VEC_STMTS (node))
@@ -96,46 +97,151 @@
 }
 
 
-/* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
-   they are of a legal type and that they match the defs of the first stmt of
-   the SLP group (stored in FIRST_STMT_...).  */
+/* Create an SLP node for SCALAR_STMTS.  */
+
+static slp_tree
+vect_create_new_slp_node (VEC (gimple, heap) *scalar_stmts)
+{
+  slp_tree node;
+  gimple stmt = VEC_index (gimple, scalar_stmts, 0);
+  unsigned int nops;
+
+  if (is_gimple_call (stmt))
+    nops = gimple_call_num_args (stmt);
+  else if (is_gimple_assign (stmt))
+    {
+      nops = gimple_num_ops (stmt) - 1;
+      if (gimple_assign_rhs_code (stmt) == COND_EXPR)
+	nops = 4;
+    }
+  else
+    return NULL;
+
+  node = XNEW (struct _slp_tree);
+  SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
+  SLP_TREE_VEC_STMTS (node) = NULL;
+  SLP_TREE_CHILDREN (node) = VEC_alloc (slp_void_p, heap, nops);
+  SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
+  SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
+
+  return node;
+}
+
+
+/* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
+   operand.  */
+static VEC (slp_oprnd_info, heap) *
+vect_create_oprnd_info (int nops, int group_size)
+{
+  int i;
+  slp_oprnd_info oprnd_info;
+  VEC (slp_oprnd_info, heap) *oprnds_info;
+
+  oprnds_info = VEC_alloc (slp_oprnd_info, heap, nops);
+  for (i = 0; i < nops; i++)
+    {
+      oprnd_info = XNEW (struct _slp_oprnd_info);
+      oprnd_info->def_stmts = VEC_alloc (gimple, heap, group_size);
+      oprnd_info->first_dt = vect_uninitialized_def;
+      oprnd_info->first_def_type = NULL_TREE;
+      oprnd_info->first_const_oprnd = NULL_TREE;
+      oprnd_info->first_pattern = false;
+      VEC_quick_push (slp_oprnd_info, oprnds_info, oprnd_info);
+    }
+
+  return oprnds_info;
+}
+
+
+/* Free operands info.  */
+
+static void
+vect_free_oprnd_info (VEC (slp_oprnd_info, heap) **oprnds_info)
+{
+  int i;
+  slp_oprnd_info oprnd_info;
+
+  FOR_EACH_VEC_ELT (slp_oprnd_info, *oprnds_info, i, oprnd_info)
+    {
+      VEC_free (gimple, heap, oprnd_info->def_stmts);
+      XDELETE (oprnd_info);
+    }
+
+  VEC_free (slp_oprnd_info, heap, *oprnds_info);
+}
+
+
+/* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
+   they are of a valid type and that they match the defs of the first stmt of
+   the SLP group (stored in OPRNDS_INFO).  */
 
 static bool
 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
                              slp_tree slp_node, gimple stmt,
-			     VEC (gimple, heap) **def_stmts0,
-			     VEC (gimple, heap) **def_stmts1,
-			     enum vect_def_type *first_stmt_dt0,
-			     enum vect_def_type *first_stmt_dt1,
-			     tree *first_stmt_def0_type,
-			     tree *first_stmt_def1_type,
-			     tree *first_stmt_const_oprnd,
-			     int ncopies_for_cost,
-                             bool *pattern0, bool *pattern1)
+			     int ncopies_for_cost, bool first,
+                             VEC (slp_oprnd_info, heap) **oprnds_info)
 {
   tree oprnd;
   unsigned int i, number_of_oprnds;
-  tree def;
+  tree def, def_op0 = NULL_TREE;
   gimple def_stmt;
-  enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
-  stmt_vec_info stmt_info =
-    vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
-  enum gimple_rhs_class rhs_class;
+  enum vect_def_type dt = vect_uninitialized_def;
+  enum vect_def_type dt_op0 = vect_uninitialized_def;
+  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+  tree lhs = gimple_get_lhs (stmt);
   struct loop *loop = NULL;
+  enum tree_code rhs_code;
+  bool different_types = false;
+  bool pattern = false;
+  slp_oprnd_info oprnd_info, oprnd0_info, oprnd1_info;
+  int op_idx = 1;
+  tree compare_rhs = NULL_TREE, rhs = NULL_TREE;
+  int cond_idx = -1;
 
   if (loop_vinfo)
     loop = LOOP_VINFO_LOOP (loop_vinfo);
 
-  rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
-  number_of_oprnds = gimple_num_ops (stmt) - 1;	/* RHS only */
+  if (is_gimple_call (stmt))
+    number_of_oprnds = gimple_call_num_args (stmt);
+  else if (is_gimple_assign (stmt))
+    {
+      number_of_oprnds = gimple_num_ops (stmt) - 1;
+      if (gimple_assign_rhs_code (stmt) == COND_EXPR)
+	{ 	      
+          number_of_oprnds = 4;
+	  cond_idx = 0;
+	  rhs = gimple_assign_rhs1 (stmt);
+	}	  
+    }
+  else
+    return false;
 
   for (i = 0; i < number_of_oprnds; i++)
     {
-      oprnd = gimple_op (stmt, i + 1);
+      if (compare_rhs)
+	oprnd = compare_rhs;
+      else
+        oprnd = gimple_op (stmt, op_idx++);
+
+      oprnd_info = VEC_index (slp_oprnd_info, *oprnds_info, i);
+
+      if (-1 < cond_idx && cond_idx < 4)
+	{
+  	  if (compare_rhs)
+	    compare_rhs = NULL_TREE;
+	  else	  
+	    oprnd = TREE_OPERAND (rhs, cond_idx++);
+	}
+		      
+      if (COMPARISON_CLASS_P (oprnd))
+        {
+          compare_rhs = TREE_OPERAND (oprnd, 1);
+          oprnd = TREE_OPERAND (oprnd, 0);
+	}
 
       if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
-                               &dt[i])
-	  || (!def_stmt && dt[i] != vect_constant_def))
+                               &dt)
+	  || (!def_stmt && dt != vect_constant_def))
 	{
 	  if (vect_print_dump_info (REPORT_SLP))
 	    {
@@ -149,34 +255,32 @@
       /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
          from the pattern.  Check that all the stmts of the node are in the
          pattern.  */
-      if (loop && def_stmt && gimple_bb (def_stmt)
-          && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
+      if (def_stmt && gimple_bb (def_stmt)
+          && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
+	      || (!loop && gimple_bb (def_stmt) == BB_VINFO_BB (bb_vinfo)
+		  && gimple_code (def_stmt) != GIMPLE_PHI))
           && vinfo_for_stmt (def_stmt)
-          && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)))
+          && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
+	  && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
+	  && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
         {
-          if (!*first_stmt_dt0)
-            *pattern0 = true;
-          else
-            {
-              if (i == 1 && !*first_stmt_dt1)
-                *pattern1 = true;
-              else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1))
-                {
-                  if (vect_print_dump_info (REPORT_DETAILS))
-                    {
-                      fprintf (vect_dump, "Build SLP failed: some of the stmts"
-                                     " are in a pattern, and others are not ");
-                      print_generic_expr (vect_dump, oprnd, TDF_SLIM);
-                    }
+          pattern = true;
+          if (!first && !oprnd_info->first_pattern)
+	    {
+	      if (vect_print_dump_info (REPORT_DETAILS))
+		{
+		  fprintf (vect_dump, "Build SLP failed: some of the stmts"
+				" are in a pattern, and others are not ");
+		  print_generic_expr (vect_dump, oprnd, TDF_SLIM);
+		}
 
-                  return false;
-                }
+	      return false;
             }
 
           def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
-          dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
+          dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
 
-          if (*dt == vect_unknown_def_type)
+          if (dt == vect_unknown_def_type)
             {
               if (vect_print_dump_info (REPORT_DETAILS))
                 fprintf (vect_dump, "Unsupported pattern.");
@@ -200,85 +304,125 @@
             }
         }
 
-      if (!*first_stmt_dt0)
+      if (first)
 	{
-	  /* op0 of the first stmt of the group - store its info.  */
-	  *first_stmt_dt0 = dt[i];
-	  if (def)
-	    *first_stmt_def0_type = TREE_TYPE (def);
-	  else
-	    *first_stmt_const_oprnd = oprnd;
+          oprnd_info->first_dt = dt;
+          oprnd_info->first_pattern = pattern;
+          if (def)
+            {
+              oprnd_info->first_def_type = TREE_TYPE (def);
+              oprnd_info->first_const_oprnd = NULL_TREE;
+            }
+          else
+            {
+              oprnd_info->first_def_type = NULL_TREE;
+              oprnd_info->first_const_oprnd = oprnd;
+            }
 
-	  /* Analyze costs (for the first stmt of the group only).  */
-	  if (rhs_class != GIMPLE_SINGLE_RHS)
-	    /* Not memory operation (we don't call this functions for loads).  */
-	    vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
-	  else
-	    /* Store.  */
-	    vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
+          if (i == 0)
+            {
+              def_op0 = def;
+              dt_op0 = dt;
+              /* Analyze costs (for the first stmt of the group only).  */
+              if (REFERENCE_CLASS_P (lhs))
+                /* Store.  */
+                vect_model_store_cost (stmt_info, ncopies_for_cost, false,
+                                       dt, slp_node);
+              else
+                /* Not memory operation (we don't call this function for
+                   loads).  */
+                vect_model_simple_cost (stmt_info, ncopies_for_cost, &dt,
+                                        slp_node);
+            }
 	}
 
       else
 	{
-	  if (!*first_stmt_dt1 && i == 1)
+          /* Not first stmt of the group, check that the def-stmt/s match
+             the def-stmt/s of the first stmt.  Allow different definition
+             types for reduction chains: the first stmt must be a
+             vect_reduction_def (a phi node), and the rest
+             vect_internal_def.  */
+          if (((oprnd_info->first_dt != dt
+                && !(oprnd_info->first_dt == vect_reduction_def
+                     && dt == vect_internal_def))
+               || (oprnd_info->first_def_type != NULL_TREE
+                   && def
+                   && !types_compatible_p (oprnd_info->first_def_type,
+                                           TREE_TYPE (def))))
+               || (!def
+                   && !types_compatible_p (TREE_TYPE (oprnd_info->first_const_oprnd),
+                                           TREE_TYPE (oprnd)))
+               || different_types)
 	    {
-	      /* op1 of the first stmt of the group - store its info.  */
-	      *first_stmt_dt1 = dt[i];
-	      if (def)
-		*first_stmt_def1_type = TREE_TYPE (def);
-	      else
+              if (number_of_oprnds != 2)
 		{
-		  /* We assume that the stmt contains only one constant
-		     operand. We fail otherwise, to be on the safe side.  */
-		  if (*first_stmt_const_oprnd)
-		    {
-		      if (vect_print_dump_info (REPORT_SLP))
-			fprintf (vect_dump, "Build SLP failed: two constant "
-				 "oprnds in stmt");
-		      return false;
-		    }
-		  *first_stmt_const_oprnd = oprnd;
-		}
-	    }
-	  else
-	    {
-	      /* Not first stmt of the group, check that the def-stmt/s match
-		 the def-stmt/s of the first stmt.  */
-	      if ((i == 0
-		   && (*first_stmt_dt0 != dt[i]
-		       || (*first_stmt_def0_type && def
-			   && !types_compatible_p (*first_stmt_def0_type,
-						   TREE_TYPE (def)))))
-		  || (i == 1
-		      && (*first_stmt_dt1 != dt[i]
-			  || (*first_stmt_def1_type && def
-			      && !types_compatible_p (*first_stmt_def1_type,
-						      TREE_TYPE (def)))))
-		  || (!def
-		      && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd),
-					      TREE_TYPE (oprnd))))
+                  if (vect_print_dump_info (REPORT_SLP))
+                    fprintf (vect_dump, "Build SLP failed: different types ");
+
+                  return false;
+                }
+
+              /* Try to swap operands in case of binary operation.  */
+              if (i == 0)
+                different_types = true;
+              else
 		{
-		  if (vect_print_dump_info (REPORT_SLP))
-		    fprintf (vect_dump, "Build SLP failed: different types ");
+                  oprnd0_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
+                  if (is_gimple_assign (stmt)
+                      && (rhs_code = gimple_assign_rhs_code (stmt))
+                      && TREE_CODE_CLASS (rhs_code) == tcc_binary
+                      && commutative_tree_code (rhs_code)
+                      && oprnd0_info->first_dt == dt
+                      && oprnd_info->first_dt == dt_op0
+                      && def_op0 && def
+                      && !(oprnd0_info->first_def_type
+                           && !types_compatible_p (oprnd0_info->first_def_type,
+                                                   TREE_TYPE (def)))
+                      && !(oprnd_info->first_def_type
+                           && !types_compatible_p (oprnd_info->first_def_type,
+                                                   TREE_TYPE (def_op0))))
+                    {
+                      if (vect_print_dump_info (REPORT_SLP))
+                       {
+                         fprintf (vect_dump, "Swapping operands of ");
+                         print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
+                       }
 
-		  return false;
+                      swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt),
+                                          gimple_assign_rhs2_ptr (stmt));
+                    }
+                  else
+                    {
+                      if (vect_print_dump_info (REPORT_SLP))
+                        fprintf (vect_dump, "Build SLP failed: different types ");
+ 
+                      return false;
+                    }
 		}
 	    }
 	}
 
       /* Check the types of the definitions.  */
-      switch (dt[i])
+      switch (dt)
 	{
 	case vect_constant_def:
 	case vect_external_def:
+        case vect_reduction_def:
 	  break;
 
 	case vect_internal_def:
-        case vect_reduction_def:
-	  if (i == 0)
-	    VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
+          if (different_types)
+            {
+              oprnd0_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
+              oprnd1_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
+              if (i == 0)
+                VEC_quick_push (gimple, oprnd1_info->def_stmts, def_stmt);
+              else
+                VEC_quick_push (gimple, oprnd0_info->def_stmts, def_stmt);
+            }
 	  else
-	    VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
+ 	    VEC_quick_push (gimple, oprnd_info->def_stmts, def_stmt);
 	  break;
 
 	default:
@@ -309,17 +453,13 @@
                      int ncopies_for_cost, unsigned int *max_nunits,
                      VEC (int, heap) **load_permutation,
                      VEC (slp_tree, heap) **loads,
-                     unsigned int vectorization_factor)
+                     unsigned int vectorization_factor, bool *loads_permuted)
 {
-  VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
-  VEC (gimple, heap) *def_stmts1 =  VEC_alloc (gimple, heap, group_size);
   unsigned int i;
   VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
   gimple stmt = VEC_index (gimple, stmts, 0);
-  enum vect_def_type first_stmt_dt0 = vect_uninitialized_def;
-  enum vect_def_type first_stmt_dt1 = vect_uninitialized_def;
   enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
-  tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
+  enum tree_code first_cond_code = ERROR_MARK;
   tree lhs;
   bool stop_recursion = false, need_same_oprnds = false;
   tree vectype, scalar_type, first_op1 = NULL_TREE;
@@ -328,13 +468,28 @@
   int icode;
   enum machine_mode optab_op2_mode;
   enum machine_mode vec_mode;
-  tree first_stmt_const_oprnd = NULL_TREE;
   struct data_reference *first_dr;
-  bool pattern0 = false, pattern1 = false;
   HOST_WIDE_INT dummy;
   bool permutation = false;
   unsigned int load_place;
   gimple first_load, prev_first_load = NULL;
+  VEC (slp_oprnd_info, heap) *oprnds_info;
+  unsigned int nops;
+  slp_oprnd_info oprnd_info;
+  tree cond;
+
+  if (is_gimple_call (stmt))
+    nops = gimple_call_num_args (stmt);
+  else if (is_gimple_assign (stmt))
+    {
+      nops = gimple_num_ops (stmt) - 1;
+      if (gimple_assign_rhs_code (stmt) == COND_EXPR)
+	nops = 4;
+    }
+  else
+    return false;
+
+  oprnds_info = vect_create_oprnd_info (nops, group_size);
 
   /* For every stmt in NODE find its def stmt/s.  */
   FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
@@ -355,6 +510,7 @@
               print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
             }
 
+	  vect_free_oprnd_info (&oprnds_info);
           return false;
         }
 
@@ -364,13 +520,30 @@
 	  if (vect_print_dump_info (REPORT_SLP))
 	    {
 	      fprintf (vect_dump,
-		       "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
+		       "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL ");
 	      print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
 	    }
 
+	  vect_free_oprnd_info (&oprnds_info);
 	  return false;
 	}
 
+       if (is_gimple_assign (stmt)
+	   && gimple_assign_rhs_code (stmt) == COND_EXPR
+           && (cond = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0))
+           && !COMPARISON_CLASS_P (cond))
+        {
+          if (vect_print_dump_info (REPORT_SLP))
+            {
+              fprintf (vect_dump,
+                       "Build SLP failed: condition is not comparison ");
+              print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
+            }
+
+          vect_free_oprnd_info (&oprnds_info);
+          return false;
+        }
+
       scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
       vectype = get_vectype_for_scalar_type (scalar_type);
       if (!vectype)
@@ -380,23 +553,20 @@
               fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
               print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
             }
+
+	  vect_free_oprnd_info (&oprnds_info);
           return false;
         }
 
-      ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
-      if (ncopies != 1)
+      /* In case of multiple types we need to detect the smallest type.  */
+      if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
         {
-	  if (vect_print_dump_info (REPORT_SLP))
-            fprintf (vect_dump, "SLP with multiple types ");
-
-          /* FORNOW: multiple types are unsupported in BB SLP.  */
-	  if (bb_vinfo)
-	    return false;
+          *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
+          if (bb_vinfo)
+            vectorization_factor = *max_nunits;
         }
 
-      /* In case of multiple types we need to detect the smallest type.  */
-      if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
-        *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
+      ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
 
       if (is_gimple_call (stmt))
 	rhs_code = CALL_EXPR;
@@ -431,6 +601,7 @@
 		    {
 		      if (vect_print_dump_info (REPORT_SLP))
 			fprintf (vect_dump, "Build SLP failed: no optab.");
+	  	      vect_free_oprnd_info (&oprnds_info);
 		      return false;
 		    }
 		  icode = (int) optab_handler (optab, vec_mode);
@@ -439,6 +610,7 @@
 		      if (vect_print_dump_info (REPORT_SLP))
 			fprintf (vect_dump, "Build SLP failed: "
 				            "op not supported by target.");
+	  	      vect_free_oprnd_info (&oprnds_info);
 		      return false;
 		    }
 		  optab_op2_mode = insn_data[icode].operand[2].mode;
@@ -449,6 +621,11 @@
 		    }
 		}
 	    }
+	  else if (rhs_code == WIDEN_LSHIFT_EXPR)
+            {
+              need_same_oprnds = true;
+              first_op1 = gimple_assign_rhs2 (stmt);
+            }
 	}
       else
 	{
@@ -470,6 +647,7 @@
 		  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
 		}
 
+	      vect_free_oprnd_info (&oprnds_info);
 	      return false;
 	    }
 
@@ -483,6 +661,7 @@
 		  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
 		}
 
+	      vect_free_oprnd_info (&oprnds_info);
 	      return false;
 	    }
 	}
@@ -494,15 +673,12 @@
 	    {
 	      /* Store.  */
 	      if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
-						stmt, &def_stmts0, &def_stmts1,
-						&first_stmt_dt0,
-						&first_stmt_dt1,
-						&first_stmt_def0_type,
-						&first_stmt_def1_type,
-						&first_stmt_const_oprnd,
-						ncopies_for_cost,
-                                                &pattern0, &pattern1))
-		return false;
+						stmt, ncopies_for_cost,
+						(i == 0), &oprnds_info))
+		{
+	  	  vect_free_oprnd_info (&oprnds_info);
+ 		  return false;
+		}
 	    }
 	  else
 	    {
@@ -520,12 +696,15 @@
                       print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
                     }
 
+	  	  vect_free_oprnd_info (&oprnds_info);
                   return false;
                 }
 
               /* Check that the size of interleaved loads group is not
                  greater than the SLP group size.  */
-              if (DR_GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
+              if (loop_vinfo
+                  && DR_GROUP_SIZE (vinfo_for_stmt (stmt)) 
+			> ncopies * group_size)
                 {
                   if (vect_print_dump_info (REPORT_SLP))
                     {
@@ -535,6 +714,7 @@
                       print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
                     }
 
+	  	  vect_free_oprnd_info (&oprnds_info);
                   return false;
                 }
 
@@ -555,6 +735,7 @@
                           print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
                         }
  
+	  	      vect_free_oprnd_info (&oprnds_info);
                       return false;
                     }
                 }
@@ -574,12 +755,13 @@
                           print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
                         }
 
+	  	      vect_free_oprnd_info (&oprnds_info);
                       return false;
                     }
 
                   /* Analyze costs (for the first stmt in the group).  */
                   vect_model_load_cost (vinfo_for_stmt (stmt),
-                                        ncopies_for_cost, *node);
+                                        ncopies_for_cost, false, *node);
                 }
 
               /* Store the place of this load in the interleaving chain.  In
@@ -601,7 +783,7 @@
 	{
 	  if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
 	    {
-	      /* Not strided load. */
+	      /* Not strided load.  */
 	      if (vect_print_dump_info (REPORT_SLP))
 		{
 		  fprintf (vect_dump, "Build SLP failed: not strided load ");
@@ -609,12 +791,14 @@
 		}
 
 	      /* FORNOW: Not strided loads are not supported.  */
+	      vect_free_oprnd_info (&oprnds_info);
 	      return false;
 	    }
 
 	  /* Not memory operation.  */
 	  if (TREE_CODE_CLASS (rhs_code) != tcc_binary
-	      && TREE_CODE_CLASS (rhs_code) != tcc_unary)
+	      && TREE_CODE_CLASS (rhs_code) != tcc_unary
+              && rhs_code != COND_EXPR)
 	    {
 	      if (vect_print_dump_info (REPORT_SLP))
 		{
@@ -623,19 +807,38 @@
 		  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
 		}
 
+	      vect_free_oprnd_info (&oprnds_info);
 	      return false;
 	    }
 
+          if (rhs_code == COND_EXPR)
+            {
+              tree cond_expr = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0);
+
+	      if (i == 0)
+		first_cond_code = TREE_CODE (cond_expr);
+              else if (first_cond_code != TREE_CODE (cond_expr))
+                {
+                  if (vect_print_dump_info (REPORT_SLP))
+                    {
+                      fprintf (vect_dump, "Build SLP failed: different"
+					  " operation");
+                      print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
+                    }
+
+		  vect_free_oprnd_info (&oprnds_info);
+                  return false;
+		}
+            }
+
 	  /* Find the def-stmts.  */
 	  if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
-					    &def_stmts0, &def_stmts1,
-					    &first_stmt_dt0, &first_stmt_dt1,
-					    &first_stmt_def0_type,
-					    &first_stmt_def1_type,
-					    &first_stmt_const_oprnd,
-					    ncopies_for_cost,
-                                            &pattern0, &pattern1))
-	    return false;
+					    ncopies_for_cost, (i == 0),
+					    &oprnds_info))
+	    {
+	      vect_free_oprnd_info (&oprnds_info);
+	      return false;
+	    }
 	}
     }
 
@@ -646,61 +849,55 @@
   /* Strided loads were reached - stop the recursion.  */
   if (stop_recursion)
     {
+      VEC_safe_push (slp_tree, heap, *loads, *node);
       if (permutation)
         {
-          VEC_safe_push (slp_tree, heap, *loads, *node);
+
+          *loads_permuted = true;
           *inside_cost 
             += targetm.vectorize.builtin_vectorization_cost (vec_perm, NULL, 0) 
                * group_size;
         }
       else
-        { 
-          /* We don't check here complex numbers chains, so we keep them in
-	     LOADS for further check in vect_supported_load_permutation_p.  */ 
+        {
+          /* We don't check here complex numbers chains, so we set
+             LOADS_PERMUTED for further check in
+             vect_supported_load_permutation_p.  */
           if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR)
-            VEC_safe_push (slp_tree, heap, *loads, *node);
+            *loads_permuted = true;
         }
 
+      vect_free_oprnd_info (&oprnds_info);
       return true;
     }
 
   /* Create SLP_TREE nodes for the definition node/s.  */
-  if (first_stmt_dt0 == vect_internal_def)
+  FOR_EACH_VEC_ELT (slp_oprnd_info, oprnds_info, i, oprnd_info)
     {
-      slp_tree left_node = XNEW (struct _slp_tree);
-      SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
-      SLP_TREE_VEC_STMTS (left_node) = NULL;
-      SLP_TREE_LEFT (left_node) = NULL;
-      SLP_TREE_RIGHT (left_node) = NULL;
-      SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
-      SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
-      if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size,
-				inside_cost, outside_cost, ncopies_for_cost,
-				max_nunits, load_permutation, loads,
-				vectorization_factor))
-	return false;
+      slp_tree child;
 
-      SLP_TREE_LEFT (*node) = left_node;
-    }
+      if (oprnd_info->first_dt != vect_internal_def)
+        continue;
 
-  if (first_stmt_dt1 == vect_internal_def)
-    {
-      slp_tree right_node = XNEW (struct _slp_tree);
-      SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
-      SLP_TREE_VEC_STMTS (right_node) = NULL;
-      SLP_TREE_LEFT (right_node) = NULL;
-      SLP_TREE_RIGHT (right_node) = NULL;
-      SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
-      SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
-      if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size,
+      child = vect_create_new_slp_node (oprnd_info->def_stmts);
+      if (!child
+          || !vect_build_slp_tree (loop_vinfo, bb_vinfo, &child, group_size,
 				inside_cost, outside_cost, ncopies_for_cost,
 				max_nunits, load_permutation, loads,
-				vectorization_factor))
-	return false;
+				vectorization_factor, loads_permuted))
+        {
+	  if (child)
+	    oprnd_info->def_stmts = NULL;
+	  vect_free_slp_tree (child);
+	  vect_free_oprnd_info (&oprnds_info);
+   	  return false;
+	}
 
-      SLP_TREE_RIGHT (*node) = right_node;
+      oprnd_info->def_stmts = NULL;
+      VEC_quick_push (slp_void_p, SLP_TREE_CHILDREN (*node), child);
     }
 
+  vect_free_oprnd_info (&oprnds_info);
   return true;
 }
 
@@ -710,6 +907,7 @@
 {
   int i;
   gimple stmt;
+  slp_void_p child;
 
   if (!node)
     return;
@@ -722,8 +920,8 @@
     }
   fprintf (vect_dump, "\n");
 
-  vect_print_slp_tree (SLP_TREE_LEFT (node));
-  vect_print_slp_tree (SLP_TREE_RIGHT (node));
+  FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
+    vect_print_slp_tree ((slp_tree) child);
 }
 
 
@@ -737,6 +935,7 @@
 {
   int i;
   gimple stmt;
+  slp_void_p child;
 
   if (!node)
     return;
@@ -745,8 +944,8 @@
     if (j < 0 || i == j)
       STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
 
-  vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
-  vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
+  FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
+    vect_mark_slp_stmts ((slp_tree) child, mark, j);
 }
 
 
@@ -758,6 +957,7 @@
   int i;
   gimple stmt;
   stmt_vec_info stmt_info;
+  slp_void_p child;
 
   if (!node)
     return;
@@ -770,8 +970,8 @@
       STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
     }
 
-  vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node));
-  vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node));
+  FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
+    vect_mark_slp_stmts_relevant ((slp_tree) child);
 }
 
 
@@ -844,12 +1044,13 @@
   gimple stmt;
   VEC (gimple, heap) *tmp_stmts;
   unsigned int index, i;
+  slp_void_p child;
 
   if (!node)
     return;
 
-  vect_slp_rearrange_stmts (SLP_TREE_LEFT (node), group_size, permutation);
-  vect_slp_rearrange_stmts (SLP_TREE_RIGHT (node), group_size, permutation);
+  FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
+    vect_slp_rearrange_stmts ((slp_tree) child, group_size, permutation);
 
   gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node)));
   tmp_stmts = VEC_alloc (gimple, heap, group_size);
@@ -881,8 +1082,10 @@
   bool supported, bad_permutation = false;
   sbitmap load_index;
   slp_tree node, other_complex_node;
-  gimple stmt, first = NULL, other_node_first;
+  gimple stmt, first = NULL, other_node_first, load, next_load, first_load;
   unsigned complex_numbers = 0;
+  struct data_reference *dr;
+  bb_vec_info bb_vinfo;
 
   /* FORNOW: permutations are only supported in SLP.  */
   if (!slp_instn)
@@ -1042,6 +1245,76 @@
         }
     }
 
+  /* In basic block vectorization we allow any subchain of an interleaving
+     chain.
+     FORNOW: not supported in loop SLP because of realignment compications.  */
+  bb_vinfo = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt));
+  bad_permutation = false;
+  /* Check that for every node in the instance teh loads form a subchain.  */
+  if (bb_vinfo)
+    {
+      FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
+        {
+          next_load = NULL;
+          first_load = NULL;
+          FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, load)
+            {
+              if (!first_load)
+                first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (load));
+              else if (first_load
+                         != DR_GROUP_FIRST_DR (vinfo_for_stmt (load)))
+                {
+                  bad_permutation = true;
+	          break;
+	        }
+
+              if (j != 0 && next_load != load)
+                {
+                  bad_permutation = true;
+                  break;
+                }
+
+              next_load = DR_GROUP_NEXT_DR (vinfo_for_stmt (load));
+            }
+
+          if (bad_permutation)
+            break;
+        }
+
+      /* Check that the alignment of the first load in every subchain, i.e.,
+         the first statement in every load node, is supported.  */
+      if (!bad_permutation)
+        {
+          FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
+            {
+              first_load = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
+              if (first_load 
+		      != DR_GROUP_FIRST_DR (vinfo_for_stmt (first_load)))
+                {
+                  dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
+                  if (vect_supportable_dr_alignment (dr, false)
+ 	               == dr_unaligned_unsupported)
+                    {
+   		      if (vect_print_dump_info (REPORT_SLP))
+		        {
+  	                  fprintf (vect_dump, "unsupported unaligned load ");
+                          print_gimple_stmt (vect_dump, first_load, 0,
+					     TDF_SLIM);
+                        }
+  		      bad_permutation = true;
+                      break;
+                    }
+	        }
+            }
+
+          if (!bad_permutation)
+            {
+              VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
+              return true;
+    	    }
+        }
+    }
+
   /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
      GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
      well (unless it's reduction).  */
@@ -1140,7 +1413,7 @@
                            gimple stmt)
 {
   slp_instance new_instance;
-  slp_tree node = XNEW (struct _slp_tree);
+  slp_tree node;
   unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
   unsigned int unrolling_factor = 1, nunits;
   tree vectype, scalar_type = NULL_TREE;
@@ -1151,6 +1424,8 @@
   VEC (int, heap) *load_permutation;
   VEC (slp_tree, heap) *loads;
   struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
+  bool loads_permuted = false;
+  VEC (gimple, heap) *scalar_stmts;
 
   if (dr)
     {
@@ -1180,7 +1455,6 @@
   if (loop_vinfo)
     vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
   else
-    /* No multitypes in BB SLP.  */
     vectorization_factor = nunits;
 
   /* Calculate the unrolling factor.  */
@@ -1195,39 +1469,31 @@
     }
 
   /* Create a node (a root of the SLP tree) for the packed strided stores.  */
-  SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
+  scalar_stmts = VEC_alloc (gimple, heap, group_size);
   next = stmt;
   if (dr)
     {
       /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS.  */
       while (next)
         {
-          VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
+          if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
+              && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
+            VEC_safe_push (gimple, heap, scalar_stmts,
+                       STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
+          else
+            VEC_safe_push (gimple, heap, scalar_stmts, next);
           next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
         }
     }
   else
     {
       /* Collect reduction statements.  */
-      for (i = 0; VEC_iterate (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, 
-                               next); 
-           i++)
-        {
-          VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
-          if (vect_print_dump_info (REPORT_DETAILS))
-            {
-              fprintf (vect_dump, "pushing reduction into node: ");
-              print_gimple_stmt (vect_dump, next, 0, TDF_SLIM);
-            }
-        }
+      VEC (gimple, heap) *reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
+      for (i = 0; VEC_iterate (gimple, reductions, i, next); i++)
+	VEC_safe_push (gimple, heap, scalar_stmts, next);
     }
 
-  SLP_TREE_VEC_STMTS (node) = NULL;
-  SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
-  SLP_TREE_LEFT (node) = NULL;
-  SLP_TREE_RIGHT (node) = NULL;
-  SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
-  SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
+  node = vect_create_new_slp_node (scalar_stmts);
 
   /* Calculate the number of vector stmts to create based on the unrolling
      factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
@@ -1241,25 +1507,33 @@
   if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
                            &inside_cost, &outside_cost, ncopies_for_cost,
 			   &max_nunits, &load_permutation, &loads,
-			   vectorization_factor))
+			   vectorization_factor, &loads_permuted))
     {
-      /* Create a new SLP instance.  */
-      new_instance = XNEW (struct _slp_instance);
-      SLP_INSTANCE_TREE (new_instance) = node;
-      SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
-      /* Calculate the unrolling factor based on the smallest type in the
-         loop.  */
+      /* Calculate the unrolling factor based on the smallest type.  */
       if (max_nunits > nunits)
         unrolling_factor = least_common_multiple (max_nunits, group_size)
                            / group_size;
 
+      if (unrolling_factor != 1 && !loop_vinfo)
+        {
+          if (vect_print_dump_info (REPORT_SLP))
+            fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
+                               " block SLP");
+          return false;
+        }
+
+      /* Create a new SLP instance.  */
+      new_instance = XNEW (struct _slp_instance);
+      SLP_INSTANCE_TREE (new_instance) = node;
+      SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
       SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
       SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
       SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
       SLP_INSTANCE_LOADS (new_instance) = loads;
       SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
       SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
-      if (VEC_length (slp_tree, loads))
+
+      if (loads_permuted)
         {
           if (!vect_supported_load_permutation_p (new_instance, group_size,
                                                   load_permutation))
@@ -1396,6 +1670,7 @@
   imm_use_iterator imm_iter;
   gimple use_stmt;
   stmt_vec_info stmt_vinfo; 
+  slp_void_p child;
 
   if (!node)
     return;
@@ -1413,8 +1688,8 @@
                      == vect_reduction_def))
 	  vect_mark_slp_stmts (node, hybrid, i);
 
-  vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
-  vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
+  FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
+    vect_detect_hybrid_slp_stmts ((slp_tree) child);
 }
 
 
@@ -1504,13 +1779,14 @@
   bool dummy;
   int i;
   gimple stmt;
+  slp_void_p child;
 
   if (!node)
     return true;
 
-  if (!vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_LEFT (node))
-      || !vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_RIGHT (node)))
-    return false;
+  FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
+    if (!vect_slp_analyze_node_operations (bb_vinfo, (slp_tree) child))
+      return false;
 
   FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
     {
@@ -1661,42 +1937,18 @@
 
 /* Check if the basic block can be vectorized.  */
 
-bb_vec_info
-vect_slp_analyze_bb (basic_block bb)
+static bb_vec_info
+vect_slp_analyze_bb_1 (basic_block bb)
 {
   bb_vec_info bb_vinfo;
   VEC (ddr_p, heap) *ddrs;
   VEC (slp_instance, heap) *slp_instances;
   slp_instance instance;
-  int i, insns = 0;
-  gimple_stmt_iterator gsi;
+  int i;
   int min_vf = 2;
   int max_vf = MAX_VECTORIZATION_FACTOR;
   bool data_dependence_in_bb = false;
 
-  current_vector_size = 0;
-
-  if (vect_print_dump_info (REPORT_DETAILS))
-    fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
-
-  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
-    {
-      gimple stmt = gsi_stmt (gsi);
-      if (!is_gimple_debug (stmt)
-	  && !gimple_nop_p (stmt)
-	  && gimple_code (stmt) != GIMPLE_LABEL)
-	insns++;
-    }
-
-  if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
-    {
-      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
-        fprintf (vect_dump, "not vectorized: too many instructions in basic "
-                            "block.\n");
-
-      return NULL;
-    }
-
   bb_vinfo = new_bb_vec_info (bb);
   if (!bb_vinfo)
     return NULL;
@@ -1722,6 +1974,8 @@
       return NULL;
     }
 
+   vect_pattern_recog (NULL, bb_vinfo);
+
    if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf, 
                                            &data_dependence_in_bb)
        || min_vf > max_vf
@@ -1756,16 +2010,6 @@
       return NULL;
     }
 
-   if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
-    {
-      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
-        fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
-                            "block.\n");
-
-      destroy_bb_vec_info (bb_vinfo);
-      return NULL;
-    }
-
   /* Check the SLP opportunities in the basic block, analyze and build SLP
      trees.  */
   if (!vect_analyze_slp (NULL, bb_vinfo))
@@ -1788,6 +2032,16 @@
       vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
     }
 
+   if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
+    {
+      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
+        fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
+                            "block.\n");
+
+      destroy_bb_vec_info (bb_vinfo);
+      return NULL;
+    }
+
   if (!vect_slp_analyze_operations (bb_vinfo))
     {
       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
@@ -1816,6 +2070,61 @@
 }
 
 
+bb_vec_info
+vect_slp_analyze_bb (basic_block bb)
+{
+  bb_vec_info bb_vinfo;
+  int insns = 0;
+  gimple_stmt_iterator gsi;
+  unsigned int vector_sizes;
+
+  if (vect_print_dump_info (REPORT_DETAILS))
+    fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
+
+  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+    {
+      gimple stmt = gsi_stmt (gsi);
+      if (!is_gimple_debug (stmt)
+          && !gimple_nop_p (stmt)
+          && gimple_code (stmt) != GIMPLE_LABEL)
+        insns++;
+    }
+
+  if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
+    {
+      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
+        fprintf (vect_dump, "not vectorized: too many instructions in basic "
+                            "block.\n");
+
+      return NULL;
+    }
+
+  /* Autodetect first vector size we try.  */
+  current_vector_size = 0;
+  vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
+
+  while (1)
+    {
+      bb_vinfo = vect_slp_analyze_bb_1 (bb);
+      if (bb_vinfo)
+        return bb_vinfo;
+
+      destroy_bb_vec_info (bb_vinfo);
+
+      vector_sizes &= ~current_vector_size;
+      if (vector_sizes == 0
+          || current_vector_size == 0)
+        return NULL;
+
+      /* Try the next biggest vector size.  */
+      current_vector_size = 1 << floor_log2 (vector_sizes);
+      if (vect_print_dump_info (REPORT_DETAILS))
+        fprintf (vect_dump, "***** Re-trying analysis with "
+                 "vector size %d\n", current_vector_size);
+    }
+}
+
+
 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
    the number of created vector stmts depends on the unrolling factor).
    However, the actual number of vector stmts for every SLP node depends on
@@ -1939,15 +2248,15 @@
 
      For example, we have two scalar operands, s1 and s2 (e.g., group of
      strided accesses of size two), while NUNITS is four (i.e., four scalars
-     of this type can be packed in a vector). The output vector will contain
-     two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
+     of this type can be packed in a vector).  The output vector will contain
+     two copies of each scalar operand: {s1, s2, s1, s2}.  (NUMBER_OF_COPIES
      will be 2).
 
      If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
      containing the operands.
 
      For example, NUNITS is four as before, and the group size is 8
-     (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
+     (s1, s2, ..., s8).  We will create two vectors {s1, s2, s3, s4} and
      {s5, s6, s7, s8}.  */
 
   number_of_copies = least_common_multiple (nunits, group_size) / group_size;
@@ -1959,8 +2268,18 @@
         {
           if (is_store)
             op = gimple_assign_rhs1 (stmt);
-          else
+          else if (gimple_assign_rhs_code (stmt) != COND_EXPR)
             op = gimple_op (stmt, op_num + 1);
+	  else
+	    {
+	      if (op_num == 0 || op_num == 1)
+		{
+		  tree cond = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0);
+		  op = TREE_OPERAND (cond, op_num);
+		}
+	      else
+		op = TREE_OPERAND (gimple_assign_rhs1 (stmt), op_num - 1);
+	    }
 
           if (reduc_index != -1)
             {
@@ -2055,88 +2374,102 @@
    If the scalar definitions are loop invariants or constants, collect them and
    call vect_get_constant_vectors() to create vector stmts.
    Otherwise, the def-stmts must be already vectorized and the vectorized stmts
-   must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
-   vect_get_slp_vect_defs() to retrieve them.
-   If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
-   the right node. This is used when the second operand must remain scalar.  */
+   must be stored in the corresponding child of SLP_NODE, and we call
+   vect_get_slp_vect_defs () to retrieve them.  */
 
 void
-vect_get_slp_defs (tree op0, tree op1, slp_tree slp_node,
-                   VEC (tree,heap) **vec_oprnds0,
-                   VEC (tree,heap) **vec_oprnds1, int reduc_index)
-{
-  gimple first_stmt;
-  enum tree_code code;
-  int number_of_vects;
+vect_get_slp_defs (VEC (tree, heap) *ops, slp_tree slp_node,
+                   VEC (slp_void_p, heap) **vec_oprnds, int reduc_index)
+{
+  gimple first_stmt, first_def;
+  int number_of_vects = 0, i;
+  unsigned int child_index = 0;
   HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
+  slp_tree child = NULL;
+  VEC (tree, heap) *vec_defs;
+  tree oprnd, def_lhs;
+  bool vectorized_defs;
 
   first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
-  /* The number of vector defs is determined by the number of vector statements
-     in the node from which we get those statements.  */
-  if (SLP_TREE_LEFT (slp_node))
-    number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node));
-  else
-    {
-      number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
-      /* Number of vector stmts was calculated according to LHS in
-         vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
-         necessary.  See vect_get_smallest_scalar_type () for details.  */
-      vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
-                                     &rhs_size_unit);
-      if (rhs_size_unit != lhs_size_unit)
-        {
-          number_of_vects *= rhs_size_unit;
-          number_of_vects /= lhs_size_unit;
-        }
-    }
+  FOR_EACH_VEC_ELT (tree, ops, i, oprnd)
+   {
+     /* For each operand we check if it has vectorized definitions in a child
+        node or we need to create them (for invariants and constants).  We
+        check if the LHS of the first stmt of the next child matches OPRND.
+        If it does, we found the correct child.  Otherwise, we call
+        vect_get_constant_vectors (), and not advance CHILD_INDEX in order
+        to check this child node for the next operand.  */
+      vectorized_defs = false;
+      if (VEC_length (slp_void_p, SLP_TREE_CHILDREN (slp_node)) > child_index)
+        {
+          child = (slp_tree) VEC_index (slp_void_p,
+                                        SLP_TREE_CHILDREN (slp_node),
+                                        child_index);
+          first_def = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (child), 0);
+
+          /* In the end of a pattern sequence we have a use of the original stmt,
+             so we need to compare OPRND with the original def.  */
+          if (is_pattern_stmt_p (vinfo_for_stmt (first_def))
+              && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first_stmt))
+              && !is_pattern_stmt_p (vinfo_for_stmt (first_stmt)))
+            first_def = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
 
-  /* Allocate memory for vectorized defs.  */
-  *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
-
-  /* SLP_NODE corresponds either to a group of stores or to a group of
-     unary/binary operations.  We don't call this function for loads.
-     For reduction defs we call vect_get_constant_vectors(), since we are
-     looking for initial loop invariant values.  */
-  if (SLP_TREE_LEFT (slp_node) && reduc_index == -1)
-    /* The defs are already vectorized.  */
-    vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
-  else
-    /* Build vectors from scalar defs.  */
-    vect_get_constant_vectors (op0, slp_node, vec_oprnds0, 0, number_of_vects,
-                               reduc_index);
-
-  if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
-    /* Since we don't call this function with loads, this is a group of
-       stores.  */
-    return;
-
-  /* For reductions, we only need initial values.  */
-  if (reduc_index != -1)
-    return;
+          if (is_gimple_call (first_def))
+            def_lhs = gimple_call_lhs (first_def);
+          else
+            def_lhs = gimple_assign_lhs (first_def);
 
-  code = gimple_assign_rhs_code (first_stmt);
-  if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1)
-    return;
+          if (operand_equal_p (oprnd, def_lhs, 0))
+            {
+              /* The number of vector defs is determined by the number of
+                 vector statements in the node from which we get those
+                 statements.  */
+               number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
+               vectorized_defs = true;
+               child_index++;
+            }
+        }
 
-  /* The number of vector defs is determined by the number of vector statements
-     in the node from which we get those statements.  */
-  if (SLP_TREE_RIGHT (slp_node))
-    number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node));
-  else
-    number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+      if (!vectorized_defs)
+        {
+          if (i == 0)
+            {
+              number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+              /* Number of vector stmts was calculated according to LHS in
+                 vect_schedule_slp_instance (), fix it by replacing LHS with
+                 RHS, if necessary.  See vect_get_smallest_scalar_type () for
+                 details.  */
+              vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
+                                             &rhs_size_unit);
+              if (rhs_size_unit != lhs_size_unit)
+                {
+                  number_of_vects *= rhs_size_unit;
+                  number_of_vects /= lhs_size_unit;
+                }
+            }
+        }
 
-  *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects);
+      /* Allocate memory for vectorized defs.  */
+      vec_defs = VEC_alloc (tree, heap, number_of_vects);
 
-  if (SLP_TREE_RIGHT (slp_node))
-    /* The defs are already vectorized.  */
-    vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
-  else
-    /* Build vectors from scalar defs.  */
-    vect_get_constant_vectors (op1, slp_node, vec_oprnds1, 1, number_of_vects,
-                               -1);
+      /* For reduction defs we call vect_get_constant_vectors (), since we are
+         looking for initial loop invariant values.  */
+      if (vectorized_defs && reduc_index == -1)
+        /* The defs are already vectorized.  */
+        vect_get_slp_vect_defs (child, &vec_defs);
+      else
+        /* Build vectors from scalar defs.  */
+        vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
+                                   number_of_vects, reduc_index);
+
+      VEC_quick_push (slp_void_p, *vec_oprnds, (slp_void_p) vec_defs);
+
+      /* For reductions, we only need initial values.  */
+      if (reduc_index != -1)
+        return;
+    }
 }
 
-
 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
    building a vector of type MASK_TYPE from it) and two input vectors placed in
    DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
@@ -2453,14 +2786,14 @@
   tree vectype;
   int i;
   slp_tree loads_node;
+  slp_void_p child;
 
   if (!node)
     return false;
 
-  vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance,
-                              vectorization_factor);
-  vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance,
-                              vectorization_factor);
+  FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
+    vect_schedule_slp_instance ((slp_tree) child, instance,
+                                vectorization_factor);
 
   stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
   stmt_info = vinfo_for_stmt (stmt);
@@ -2507,8 +2840,11 @@
   /* Loads should be inserted before the first load.  */
   if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
       && STMT_VINFO_STRIDED_ACCESS (stmt_info)
-      && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
+      && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
+      && SLP_INSTANCE_LOAD_PERMUTATION (instance))
     si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
+  else if (is_pattern_stmt_p (stmt_info))
+    si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
   else
     si = gsi_for_stmt (stmt);
 
--- a/src/gcc/tree-vect-stmts.c
+++ b/src/gcc/tree-vect-stmts.c
@@ -42,6 +42,82 @@
 #include "langhooks.h"
 
 
+/* Return a variable of type ELEM_TYPE[NELEMS].  */
+
+static tree
+create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
+{
+  return create_tmp_var (build_array_type_nelts (elem_type, nelems),
+			 "vect_array");
+}
+
+/* ARRAY is an array of vectors created by create_vector_array.
+   Return an SSA_NAME for the vector in index N.  The reference
+   is part of the vectorization of STMT and the vector is associated
+   with scalar destination SCALAR_DEST.  */
+
+static tree
+read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
+		   tree array, unsigned HOST_WIDE_INT n)
+{
+  tree vect_type, vect, vect_name, array_ref;
+  gimple new_stmt;
+
+  gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
+  vect_type = TREE_TYPE (TREE_TYPE (array));
+  vect = vect_create_destination_var (scalar_dest, vect_type);
+  array_ref = build4 (ARRAY_REF, vect_type, array,
+		      build_int_cst (size_type_node, n),
+		      NULL_TREE, NULL_TREE);
+
+  new_stmt = gimple_build_assign (vect, array_ref);
+  vect_name = make_ssa_name (vect, new_stmt);
+  gimple_assign_set_lhs (new_stmt, vect_name);
+  vect_finish_stmt_generation (stmt, new_stmt, gsi);
+  mark_symbols_for_renaming (new_stmt);
+
+  return vect_name;
+}
+
+/* ARRAY is an array of vectors created by create_vector_array.
+   Emit code to store SSA_NAME VECT in index N of the array.
+   The store is part of the vectorization of STMT.  */
+
+static void
+write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
+		    tree array, unsigned HOST_WIDE_INT n)
+{
+  tree array_ref;
+  gimple new_stmt;
+
+  array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
+		      build_int_cst (size_type_node, n),
+		      NULL_TREE, NULL_TREE);
+
+  new_stmt = gimple_build_assign (array_ref, vect);
+  vect_finish_stmt_generation (stmt, new_stmt, gsi);
+  mark_symbols_for_renaming (new_stmt);
+}
+
+/* PTR is a pointer to an array of type TYPE.  Return a representation
+   of *PTR.  The memory reference replaces those in FIRST_DR
+   (and its group).  */
+
+static tree
+create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
+{
+  struct ptr_info_def *pi;
+  tree mem_ref, alias_ptr_type;
+
+  alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
+  mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
+  /* Arrays have the same alignment as their type.  */
+  pi = get_ptr_info (ptr);
+  pi->align = TYPE_ALIGN_UNIT (type);
+  pi->misalign = 0;
+  return mem_ref;
+}
+
 /* Utility functions used by vect_mark_stmts_to_be_vectorized.  */
 
 /* Function vect_mark_relevant.
@@ -50,33 +126,72 @@
 
 static void
 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
-		    enum vect_relevant relevant, bool live_p)
+		    enum vect_relevant relevant, bool live_p,
+		    bool used_in_pattern)
 {
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
   enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
   bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
+  gimple pattern_stmt;
 
   if (vect_print_dump_info (REPORT_DETAILS))
     fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
 
+  /* If this stmt is an original stmt in a pattern, we might need to mark its
+     related pattern stmt instead of the original stmt.  However, such stmts 
+     may have their own uses that are not in any pattern, in such cases the
+     stmt itself should be marked.  */
   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
     {
-      gimple pattern_stmt;
+      bool found = false;
+      if (!used_in_pattern)
+        {
+          imm_use_iterator imm_iter;
+          use_operand_p use_p;
+          gimple use_stmt;
+          tree lhs;
+
+          if (is_gimple_assign (stmt))
+            lhs = gimple_assign_lhs (stmt);
+          else
+            lhs = gimple_call_lhs (stmt);
+
+          /* This use is out of pattern use, if LHS has other uses that are
+             pattern uses, we should mark the stmt itself, and not the pattern
+             stmt.  */
+          FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
+            {
+              if (is_gimple_debug (USE_STMT (use_p)))
+                continue;
+              use_stmt = USE_STMT (use_p);
+
+              if (vinfo_for_stmt (use_stmt)
+                  && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
+                {
+                  found = true;
+                  break;
+                }
+            }
+        }
 
-      /* This is the last stmt in a sequence that was detected as a
-         pattern that can potentially be vectorized.  Don't mark the stmt
-         as relevant/live because it's not going to be vectorized.
-         Instead mark the pattern-stmt that replaces it.  */
-
-      pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
-
-      if (vect_print_dump_info (REPORT_DETAILS))
-        fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
-      stmt_info = vinfo_for_stmt (pattern_stmt);
-      gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
-      save_relevant = STMT_VINFO_RELEVANT (stmt_info);
-      save_live_p = STMT_VINFO_LIVE_P (stmt_info);
-      stmt = pattern_stmt;
+      if (!found)
+        {
+          /* This is the last stmt in a sequence that was detected as a
+             pattern that can potentially be vectorized.  Don't mark the stmt
+             as relevant/live because it's not going to be vectorized.
+             Instead mark the pattern-stmt that replaces it.  */
+
+          pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
+
+          if (vect_print_dump_info (REPORT_DETAILS))
+            fprintf (vect_dump, "last stmt in pattern. don't mark"
+                                " relevant/live.");
+          stmt_info = vinfo_for_stmt (pattern_stmt);
+          gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
+          save_relevant = STMT_VINFO_RELEVANT (stmt_info);
+          save_live_p = STMT_VINFO_LIVE_P (stmt_info);
+          stmt = pattern_stmt;
+        }
     }
 
   STMT_VINFO_LIVE_P (stmt_info) |= live_p;
@@ -361,7 +476,8 @@
         }
     }
 
-  vect_mark_relevant (worklist, def_stmt, relevant, live_p);
+  vect_mark_relevant (worklist, def_stmt, relevant, live_p,
+                      is_pattern_stmt_p (stmt_vinfo));
   return true;
 }
 
@@ -418,7 +534,7 @@
 	    }
 
 	  if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
-	    vect_mark_relevant (&worklist, phi, relevant, live_p);
+	    vect_mark_relevant (&worklist, phi, relevant, live_p, false);
 	}
       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
 	{
@@ -430,7 +546,7 @@
 	    }
 
 	  if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
-            vect_mark_relevant (&worklist, stmt, relevant, live_p);
+            vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
 	}
     }
 
@@ -529,15 +645,109 @@
             break;
         }
 
-      FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
-	{
-	  tree op = USE_FROM_PTR (use_p);
-	  if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist))
-	    {
-	      VEC_free (gimple, heap, worklist);
-	      return false;
-	    }
-	}
+      if (is_pattern_stmt_p (vinfo_for_stmt (stmt)))
+        {
+          /* Pattern statements are not inserted into the code, so
+             FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
+             have to scan the RHS or function arguments instead.  */
+          if (is_gimple_assign (stmt))
+            {
+              tree rhs = gimple_assign_rhs1 (stmt);
+              unsigned int op_num;
+              tree op;
+	      enum tree_code rhs_code;
+              switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
+                {
+                  case GIMPLE_SINGLE_RHS:
+                    op = gimple_assign_rhs1 (stmt);
+                    rhs_code = gimple_assign_rhs_code (stmt);
+		    i = 0;
+		    if (rhs_code == COND_EXPR
+			&& COMPARISON_CLASS_P (TREE_OPERAND (op, 0)))
+		      {
+			op = TREE_OPERAND (op, 0);
+			if (!process_use (stmt, TREE_OPERAND (op, 0),
+				   	  loop_vinfo,
+					  live_p, relevant, &worklist)
+			    || !process_use (stmt, TREE_OPERAND (op, 1),
+				   	 loop_vinfo,
+				         live_p, relevant, &worklist))
+		    	  {
+			    VEC_free (gimple, heap, worklist);
+			    return false;
+			  }
+		  	i = 1;
+		      }
+                    op_num = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt));
+                    for (i; i < op_num; i++)
+                      {
+                        op = TREE_OPERAND (rhs, i);
+                        if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
+                                          &worklist))
+                          {
+                            VEC_free (gimple, heap, worklist);
+                            return false;
+                          }
+                      }
+                    break;
+                   
+                  case GIMPLE_BINARY_RHS:
+                    op = gimple_assign_rhs1 (stmt);
+                    if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
+                                      &worklist))
+                      {
+                        VEC_free (gimple, heap, worklist);
+                        return false;
+                      }
+                    op = gimple_assign_rhs2 (stmt);
+                    if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
+                                      &worklist))
+                      {
+                        VEC_free (gimple, heap, worklist);
+                        return false;
+                      }
+                    break;
+
+                  case GIMPLE_UNARY_RHS:
+                    op = gimple_assign_rhs1 (stmt);
+                    if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
+                                      &worklist))
+                      {
+                        VEC_free (gimple, heap, worklist);
+                        return false;
+                      }
+
+                    break;
+                  
+                  default: 
+                    return false;
+                }
+            }
+          else if (is_gimple_call (stmt))
+            {
+              for (i = 0; i < gimple_call_num_args (stmt); i++)
+                {
+                  tree arg = gimple_call_arg (stmt, i);
+                  if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
+                                    &worklist))
+                    {
+                      VEC_free (gimple, heap, worklist);
+                      return false;
+                    }
+                }
+            }
+        }
+      else
+        FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
+          {
+            tree op = USE_FROM_PTR (use_p);
+            if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
+                              &worklist))
+              {
+                VEC_free (gimple, heap, worklist);
+                return false;
+              }
+          }
     } /* while worklist */
 
   VEC_free (gimple, heap, worklist);
@@ -688,7 +898,8 @@
 
 void
 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
-		       enum vect_def_type dt, slp_tree slp_node)
+		       bool store_lanes_p, enum vect_def_type dt,
+		       slp_tree slp_node)
 {
   int group_size;
   unsigned int inside_cost = 0, outside_cost = 0;
@@ -725,9 +936,11 @@
       first_dr = STMT_VINFO_DATA_REF (stmt_info);
     }
 
-  /* Is this an access in a group of stores, which provide strided access?
-     If so, add in the cost of the permutes.  */
-  if (group_size > 1)
+  /* We assume that the cost of a single store-lanes instruction is
+     equivalent to the cost of GROUP_SIZE separate stores.  If a strided
+     access is instead being provided by a permute-and-store operation,
+     include the cost of the permutes.  */
+  if (!store_lanes_p && group_size > 1)
     {
       /* Uses a high and low interleave operation for each needed permute.  */
       inside_cost = ncopies * exact_log2(group_size) * group_size
@@ -789,6 +1002,16 @@
         break;
       }
 
+    case dr_unaligned_unsupported:
+      {
+        *inside_cost = VECT_MAX_COST;
+
+        if (vect_print_dump_info (REPORT_COST))
+          fprintf (vect_dump, "vect_model_store_cost: unsupported access.");
+
+        break;
+      }
+
     default:
       gcc_unreachable ();
     }
@@ -803,8 +1026,8 @@
    access scheme chosen.  */
 
 void
-vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
-
+vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, bool load_lanes_p,
+		      slp_tree slp_node)
 {
   int group_size;
   gimple first_stmt;
@@ -829,9 +1052,11 @@
       first_dr = dr;
     }
 
-  /* Is this an access in a group of loads providing strided access?
-     If so, add in the cost of the permutes.  */
-  if (group_size > 1)
+  /* We assume that the cost of a single load-lanes instruction is
+     equivalent to the cost of GROUP_SIZE separate loads.  If a strided
+     access is instead being provided by a load-and-permute operation,
+     include the cost of the permutes.  */
+  if (!load_lanes_p && group_size > 1)
     {
       /* Uses an even and odd extract operations for each needed permute.  */
       inside_cost = ncopies * exact_log2(group_size) * group_size
@@ -938,6 +1163,16 @@
         break;
       }
 
+    case dr_unaligned_unsupported:
+      {
+        *inside_cost = VECT_MAX_COST;
+
+        if (vect_print_dump_info (REPORT_COST))
+          fprintf (vect_dump, "vect_model_load_cost: unsupported access.");
+
+        break;
+      }
+
     default:
       gcc_unreachable ();
     }
@@ -1116,7 +1351,14 @@
 
         /* Get the def from the vectorized stmt.  */
         def_stmt_info = vinfo_for_stmt (def_stmt);
+
         vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
+        /* Get vectorized pattern statement.  */
+        if (!vec_stmt
+            && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
+            && !STMT_VINFO_RELEVANT (def_stmt_info))
+          vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
+                       STMT_VINFO_RELATED_STMT (def_stmt_info)));
         gcc_assert (vec_stmt);
 	if (gimple_code (vec_stmt) == GIMPLE_PHI)
 	  vec_oprnd = PHI_RESULT (vec_stmt);
@@ -1265,16 +1507,35 @@
 }
 
 
-/* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not
-   NULL.  */
+/* Get vectorized definitions for OP0 and OP1.
+   REDUC_INDEX is the index of reduction operand in case of reduction,
+   and -1 otherwise.  */
 
-static void
+void
 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
-		   VEC(tree,heap) **vec_oprnds0, VEC(tree,heap) **vec_oprnds1,
-		   slp_tree slp_node)
+		   VEC (tree, heap) **vec_oprnds0,
+		   VEC (tree, heap) **vec_oprnds1,
+		   slp_tree slp_node, int reduc_index)
 {
   if (slp_node)
-    vect_get_slp_defs (op0, op1, slp_node, vec_oprnds0, vec_oprnds1, -1);
+    {
+      int nops = (op1 == NULL_TREE) ? 1 : 2;
+      VEC (tree, heap) *ops = VEC_alloc (tree, heap, nops);
+      VEC (slp_void_p, heap) *vec_defs = VEC_alloc (slp_void_p, heap, nops);
+
+      VEC_quick_push (tree, ops, op0);
+      if (op1)
+        VEC_quick_push (tree, ops, op1);
+
+      vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
+
+      *vec_oprnds0 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
+      if (op1)
+        *vec_oprnds1 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 1);
+
+      VEC_free (tree, heap, ops);
+      VEC_free (slp_void_p, heap, vec_defs);
+    }
   else
     {
       tree vec_oprnd;
@@ -1372,6 +1633,7 @@
   VEC(tree, heap) *vargs = NULL;
   enum { NARROW, NONE, WIDEN } modifier;
   size_t i, nargs;
+  tree lhs;
 
   /* FORNOW: unsupported in basic block SLP.  */
   gcc_assert (loop_vinfo);
@@ -1509,7 +1771,7 @@
   /** Transform.  **/
 
   if (vect_print_dump_info (REPORT_DETAILS))
-    fprintf (vect_dump, "transform operation.");
+    fprintf (vect_dump, "transform call.");
 
   /* Handle def.  */
   scalar_dest = gimple_call_lhs (stmt);
@@ -1628,8 +1890,11 @@
      rhs of the statement with something harmless.  */
 
   type = TREE_TYPE (scalar_dest);
-  new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
-				  build_zero_cst (type));
+  if (is_pattern_stmt_p (stmt_info))
+    lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
+  else
+    lhs = gimple_call_lhs (stmt);
+  new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
   set_vinfo_for_stmt (new_stmt, stmt_info);
   /* For pattern statements make the related statement to point to
      NEW_STMT in order to be able to retrieve the original statement
@@ -1858,7 +2123,8 @@
       for (j = 0; j < ncopies; j++)
 	{
 	  if (j == 0)
-	    vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
+	    vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
+			       -1);
 	  else
 	    vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
 
@@ -2063,7 +2329,7 @@
     {
       /* Handle uses.  */
       if (j == 0)
-        vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
+        vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
       else
         vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
 
@@ -2096,6 +2362,42 @@
 }
 
 
+/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
+   either as shift by a scalar or by a vector.  */
+
+bool
+vect_supportable_shift (enum tree_code code, tree scalar_type)
+{
+
+  enum machine_mode vec_mode;
+  optab optab;
+  int icode;
+  tree vectype;
+
+  vectype = get_vectype_for_scalar_type (scalar_type);
+  if (!vectype)
+    return false;
+
+  optab = optab_for_tree_code (code, vectype, optab_scalar);
+  if (!optab
+      || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
+    {
+      optab = optab_for_tree_code (code, vectype, optab_vector);
+      if (!optab
+          || (optab_handler (optab, TYPE_MODE (vectype))
+                      == CODE_FOR_nothing))
+        return false;
+    }
+
+  vec_mode = TYPE_MODE (vectype);
+  icode = (int) optab_handler (optab, vec_mode);
+  if (icode == CODE_FOR_nothing)
+    return false;
+
+  return true;
+}
+
+
 /* Function vectorizable_shift.
 
    Check if STMT performs a shift operation that can be vectorized.
@@ -2382,10 +2684,10 @@
              operand 1 should be of a vector type (the usual case).  */
           if (vec_oprnd1)
             vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
-                               slp_node);
+                               slp_node, -1);
           else
             vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
-                               slp_node);
+                               slp_node, -1);
         }
       else
         vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
@@ -2693,10 +2995,10 @@
 	{
 	  if (op_type == binary_op || op_type == ternary_op)
 	    vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
-			       slp_node);
+			       slp_node, -1);
 	  else
 	    vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
-			       slp_node);
+			       slp_node, -1);
 	  if (op_type == ternary_op)
 	    {
 	      vec_oprnds2 = VEC_alloc (tree, heap, 1);
@@ -2887,11 +3189,9 @@
   VEC (tree, heap) *vec_oprnds0 = NULL;
   VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
   tree last_oprnd, intermediate_type;
+  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
 
-  /* FORNOW: not supported by basic block SLP vectorization.  */
-  gcc_assert (loop_vinfo);
-
-  if (!STMT_VINFO_RELEVANT_P (stmt_info))
+  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
     return false;
 
   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
@@ -2919,7 +3219,7 @@
 	     && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
 	     && CONVERT_EXPR_CODE_P (code))))
     return false;
-  if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL,
+  if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
 			     &def_stmt, &def, &dt[0], &vectype_in))
     {
       if (vect_print_dump_info (REPORT_DETAILS))
@@ -3010,7 +3310,8 @@
     {
       /* Handle uses.  */
       if (slp_node)
-        vect_get_slp_defs (op0, NULL_TREE, slp_node, &vec_oprnds0, NULL, -1);
+        vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
+			   slp_node, -1);
       else
         {
           VEC_free (tree, heap, vec_oprnds0);
@@ -3166,11 +3467,10 @@
   int multi_step_cvt = 0;
   VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
   VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
+  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
+  unsigned int k;
 
-  /* FORNOW: not supported by basic block SLP vectorization.  */
-  gcc_assert (loop_vinfo);
-
-  if (!STMT_VINFO_RELEVANT_P (stmt_info))
+  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
     return false;
 
   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
@@ -3185,7 +3485,8 @@
 
   code = gimple_assign_rhs_code (stmt);
   if (!CONVERT_EXPR_CODE_P (code)
-      && code != WIDEN_MULT_EXPR)
+      && code != WIDEN_MULT_EXPR
+      && code != WIDEN_LSHIFT_EXPR)
     return false;
 
   scalar_dest = gimple_assign_lhs (stmt);
@@ -3199,13 +3500,40 @@
 	     && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
 	     && CONVERT_EXPR_CODE_P (code))))
     return false;
-  if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL,
+  if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
 			     &def_stmt, &def, &dt[0], &vectype_in))
     {
       if (vect_print_dump_info (REPORT_DETAILS))
 	fprintf (vect_dump, "use not simple.");
       return false;
     }
+
+  op_type = TREE_CODE_LENGTH (code);
+  if (op_type == binary_op)
+    {
+      bool ok;
+
+      op1 = gimple_assign_rhs2 (stmt);
+      if (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR)
+        {
+	  /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
+	     OP1.  */
+          if (CONSTANT_CLASS_P (op0))
+            ok = vect_is_simple_use_1 (op1, loop_vinfo, bb_vinfo,
+                             &def_stmt, &def, &dt[1], &vectype_in);
+          else
+            ok = vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt,
+				     &def, &dt[1]);
+
+          if (!ok)
+            {
+	      if (vect_print_dump_info (REPORT_DETAILS))
+	        fprintf (vect_dump, "use not simple.");
+              return false;
+            }
+        }        
+    }
+
   /* If op0 is an external or constant def use a vector type with
      the same size as the output vector type.  */
   if (!vectype_in)
@@ -3238,18 +3566,6 @@
 
   gcc_assert (ncopies >= 1);
 
-  op_type = TREE_CODE_LENGTH (code);
-  if (op_type == binary_op)
-    {
-      op1 = gimple_assign_rhs2 (stmt);
-      if (!vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def, &dt[1]))
-        {
-	  if (vect_print_dump_info (REPORT_DETAILS))
-	    fprintf (vect_dump, "use not simple.");
-          return false;
-        }
-    }
-
   /* Supportable by target?  */
   if (!supportable_widening_operation (code, stmt, vectype_out, vectype_in,
 				       &decl1, &decl2, &code1, &code2,
@@ -3275,6 +3591,14 @@
     fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
                         ncopies);
 
+  if (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR)
+    {
+      if (CONSTANT_CLASS_P (op0))
+	op0 = fold_convert (TREE_TYPE (op1), op0);
+      else if (CONSTANT_CLASS_P (op1))
+	op1 = fold_convert (TREE_TYPE (op0), op1);
+    }
+
   /* Handle def.  */
   /* In case of multi-step promotion, we first generate promotion operations
      to the intermediate types, and then from that types to the final one.
@@ -3308,6 +3632,8 @@
       if (op_type == binary_op)
         vec_oprnds1 = VEC_alloc (tree, heap, 1);
     }
+  else if (code == WIDEN_LSHIFT_EXPR)
+    vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
 
   /* In case the vectorization factor (VF) is bigger than the number
      of elements that we can fit in a vectype (nunits), we have to generate
@@ -3321,15 +3647,33 @@
       if (j == 0)
         {
           if (slp_node)
-              vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0,
-                                 &vec_oprnds1, -1);
-          else
+	    {
+	      if (code == WIDEN_LSHIFT_EXPR)
+                {
+                  vec_oprnd1 = op1;
+		  /* Store vec_oprnd1 for every vector stmt to be created
+		     for SLP_NODE.  We check during the analysis that all
+		     the shift arguments are the same.  */
+                  for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
+                    VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
+
+    		  vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
+ 	                             slp_node, -1);
+                }
+              else
+                vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
+                                   &vec_oprnds1, slp_node, -1);
+	    }
+	  else
             {
               vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
               VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
               if (op_type == binary_op)
                 {
-                  vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
+                  if (code == WIDEN_LSHIFT_EXPR)
+                    vec_oprnd1 = op1;
+                  else
+                    vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
                   VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
                 }
             }
@@ -3340,7 +3684,10 @@
           VEC_replace (tree, vec_oprnds0, 0, vec_oprnd0);
           if (op_type == binary_op)
             {
-              vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
+              if (code == WIDEN_LSHIFT_EXPR)
+                vec_oprnd1 = op1;
+              else
+                vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
               VEC_replace (tree, vec_oprnds1, 0, vec_oprnd1);
             }
         }
@@ -3385,6 +3732,7 @@
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+  tree elem_type;
   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
   struct loop *loop = NULL;
   enum machine_mode vec_mode;
@@ -3400,6 +3748,7 @@
   int j;
   gimple next_stmt, first_stmt = NULL;
   bool strided_store = false;
+  bool store_lanes_p = false;
   unsigned int group_size, i;
   VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
   bool inv_p;
@@ -3407,6 +3756,7 @@
   bool slp = (slp_node != NULL);
   unsigned int vec_num;
   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
+  tree aggr_type;
 
   if (loop_vinfo)
     loop = LOOP_VINFO_LOOP (loop_vinfo);
@@ -3460,7 +3810,8 @@
 
   /* The scalar rhs type needs to be trivially convertible to the vector
      component type.  This should always be the case.  */
-  if (!useless_type_conversion_p (TREE_TYPE (vectype), TREE_TYPE (op)))
+  elem_type = TREE_TYPE (vectype);
+  if (!useless_type_conversion_p (elem_type, TREE_TYPE (op)))
     {
       if (vect_print_dump_info (REPORT_DETAILS))
         fprintf (vect_dump, "???  operands of different types");
@@ -3487,9 +3838,14 @@
     {
       strided_store = true;
       first_stmt = DR_GROUP_FIRST_DR (stmt_info);
-      if (!vect_strided_store_supported (vectype)
-	  && !PURE_SLP_STMT (stmt_info) && !slp)
-	return false;
+      if (!slp && !PURE_SLP_STMT (stmt_info))
+	{
+	  group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
+	  if (vect_store_lanes_supported (vectype, group_size))
+	    store_lanes_p = true;
+	  else if (!vect_strided_store_supported (vectype, group_size))
+	    return false;
+	}
 
       if (first_stmt == stmt)
 	{
@@ -3515,7 +3871,7 @@
   if (!vec_stmt) /* transformation not required.  */
     {
       STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
-      vect_model_store_cost (stmt_info, ncopies, dt, NULL);
+      vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt, NULL);
       return true;
     }
 
@@ -3549,6 +3905,7 @@
           vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
           first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0); 
           first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
+	  op = gimple_assign_rhs1 (first_stmt);
         } 
       else
         /* VEC_NUM is the number of vect stmts to be created for this 
@@ -3570,6 +3927,16 @@
 
   alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
   gcc_assert (alignment_support_scheme);
+  /* Targets with store-lane instructions must not require explicit
+     realignment.  */
+  gcc_assert (!store_lanes_p
+	      || alignment_support_scheme == dr_aligned
+	      || alignment_support_scheme == dr_unaligned_supported);
+
+  if (store_lanes_p)
+    aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
+  else
+    aggr_type = vectype;
 
   /* In case the vectorization factor (VF) is bigger than the number
      of elements that we can fit in a vectype (nunits), we have to generate
@@ -3621,8 +3988,8 @@
           if (slp)
             {
 	      /* Get vectorized arguments for SLP_NODE.  */
-              vect_get_slp_defs (NULL_TREE, NULL_TREE, slp_node, &vec_oprnds,
-                                 NULL, -1);
+              vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
+                                 NULL, slp_node, -1);
 
               vec_oprnd = VEC_index (tree, vec_oprnds, 0);
             }
@@ -3658,9 +4025,9 @@
 	  /* We should have catched mismatched types earlier.  */
 	  gcc_assert (useless_type_conversion_p (vectype,
 						 TREE_TYPE (vec_oprnd)));
-	  dataref_ptr = vect_create_data_ref_ptr (first_stmt, NULL, NULL_TREE,
-						  &dummy, &ptr_incr, false,
-						  &inv_p);
+	  dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL,
+						  NULL_TREE, &dummy,
+						  &ptr_incr, false, &inv_p);
 	  gcc_assert (bb_vinfo || !inv_p);
 	}
       else
@@ -3681,76 +4048,101 @@
 	      VEC_replace(tree, dr_chain, i, vec_oprnd);
 	      VEC_replace(tree, oprnds, i, vec_oprnd);
 	    }
-	  dataref_ptr =
-		bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE);
+	  dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
+					 TYPE_SIZE_UNIT (aggr_type));
 	}
 
-      if (strided_store)
+      if (store_lanes_p)
 	{
-	  result_chain = VEC_alloc (tree, heap, group_size);
-	  /* Permute.  */
-	  if (!vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
-					 &result_chain))
-	    return false;
-	}
+	  tree vec_array;
 
-      next_stmt = first_stmt;
-      for (i = 0; i < vec_num; i++)
-	{
-	  struct ptr_info_def *pi;
-
-	  if (i > 0)
-	    /* Bump the vector pointer.  */
-	    dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
-					   NULL_TREE);
-
-	  if (slp)
-	    vec_oprnd = VEC_index (tree, vec_oprnds, i);
-	  else if (strided_store)
-	    /* For strided stores vectorized defs are interleaved in
-	       vect_permute_store_chain().  */
-	    vec_oprnd = VEC_index (tree, result_chain, i);
-
-	  data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
-			     build_int_cst (reference_alias_ptr_type
-					    (DR_REF (first_dr)), 0));
-	  pi = get_ptr_info (dataref_ptr);
-	  pi->align = TYPE_ALIGN_UNIT (vectype);
-          if (aligned_access_p (first_dr))
-	    pi->misalign = 0;
-          else if (DR_MISALIGNMENT (first_dr) == -1)
-	    {
-	      TREE_TYPE (data_ref)
-		= build_aligned_type (TREE_TYPE (data_ref),
-				      TYPE_ALIGN (TREE_TYPE (vectype)));
-	      pi->align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
-	      pi->misalign = 0;
-	    }
-	  else
+	  /* Combine all the vectors into an array.  */
+	  vec_array = create_vector_array (vectype, vec_num);
+	  for (i = 0; i < vec_num; i++)
 	    {
-	      TREE_TYPE (data_ref)
-		= build_aligned_type (TREE_TYPE (data_ref),
-				      TYPE_ALIGN (TREE_TYPE (vectype)));
-	      pi->misalign = DR_MISALIGNMENT (first_dr);
+	      vec_oprnd = VEC_index (tree, dr_chain, i);
+	      write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
 	    }
 
-	  /* Arguments are ready.  Create the new vector stmt.  */
-	  new_stmt = gimple_build_assign (data_ref, vec_oprnd);
+	  /* Emit:
+	       MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY).  */
+	  data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
+	  new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
+	  gimple_call_set_lhs (new_stmt, data_ref);
 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
 	  mark_symbols_for_renaming (new_stmt);
+	}
+      else
+	{
+	  new_stmt = NULL;
+	  if (strided_store)
+	    {
+	      result_chain = VEC_alloc (tree, heap, group_size);
+	      /* Permute.  */
+	      vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
+					&result_chain);
+	    }
 
-          if (slp)
-            continue;
+	  next_stmt = first_stmt;
+	  for (i = 0; i < vec_num; i++)
+	    {
+	      struct ptr_info_def *pi;
 
-          if (j == 0)
-            STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt =  new_stmt;
+	      if (i > 0)
+		/* Bump the vector pointer.  */
+		dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
+					       stmt, NULL_TREE);
+
+	      if (slp)
+		vec_oprnd = VEC_index (tree, vec_oprnds, i);
+	      else if (strided_store)
+		/* For strided stores vectorized defs are interleaved in
+		   vect_permute_store_chain().  */
+		vec_oprnd = VEC_index (tree, result_chain, i);
+
+	      data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
+				 build_int_cst (reference_alias_ptr_type
+						(DR_REF (first_dr)), 0));
+	      pi = get_ptr_info (dataref_ptr);
+	      pi->align = TYPE_ALIGN_UNIT (vectype);
+	      if (aligned_access_p (first_dr))
+		pi->misalign = 0;
+	      else if (DR_MISALIGNMENT (first_dr) == -1)
+		{
+		  TREE_TYPE (data_ref)
+		    = build_aligned_type (TREE_TYPE (data_ref),
+					  TYPE_ALIGN (elem_type));
+		  pi->align = TYPE_ALIGN_UNIT (elem_type);
+		  pi->misalign = 0;
+		}
+	      else
+		{
+		  TREE_TYPE (data_ref)
+		    = build_aligned_type (TREE_TYPE (data_ref),
+					  TYPE_ALIGN (elem_type));
+		  pi->misalign = DR_MISALIGNMENT (first_dr);
+		}
+
+	      /* Arguments are ready.  Create the new vector stmt.  */
+	      new_stmt = gimple_build_assign (data_ref, vec_oprnd);
+	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
+	      mark_symbols_for_renaming (new_stmt);
+
+	      if (slp)
+		continue;
+
+	      next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
+	      if (!next_stmt)
+		break;
+	    }
+	}
+      if (!slp)
+	{
+	  if (j == 0)
+	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
 	  else
 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
-
 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
-	  next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
-	  if (!next_stmt)
-	    break;
 	}
     }
 
@@ -3861,6 +4253,7 @@
   bool nested_in_vect_loop = false;
   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+  tree elem_type;
   tree new_temp;
   enum machine_mode mode;
   gimple new_stmt = NULL;
@@ -3877,6 +4270,7 @@
   gimple phi = NULL;
   VEC(tree,heap) *dr_chain = NULL;
   bool strided_load = false;
+  bool load_lanes_p = false;
   gimple first_stmt;
   tree scalar_type;
   bool inv_p;
@@ -3889,6 +4283,7 @@
   enum tree_code code;
   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
   int vf;
+  tree aggr_type;
 
   if (loop_vinfo)
     {
@@ -3965,7 +4360,8 @@
 
   /* The vector component type needs to be trivially convertible to the
      scalar lhs.  This should always be the case.  */
-  if (!useless_type_conversion_p (TREE_TYPE (scalar_dest), TREE_TYPE (vectype)))
+  elem_type = TREE_TYPE (vectype);
+  if (!useless_type_conversion_p (TREE_TYPE (scalar_dest), elem_type))
     {
       if (vect_print_dump_info (REPORT_DETAILS))
         fprintf (vect_dump, "???  operands of different types");
@@ -3979,10 +4375,15 @@
       /* FORNOW */
       gcc_assert (! nested_in_vect_loop);
 
-      /* Check if interleaving is supported.  */
-      if (!vect_strided_load_supported (vectype)
-	  && !PURE_SLP_STMT (stmt_info) && !slp)
-	return false;
+      first_stmt = DR_GROUP_FIRST_DR (stmt_info);
+      if (!slp && !PURE_SLP_STMT (stmt_info))
+	{
+	  group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
+	  if (vect_load_lanes_supported (vectype, group_size))
+	    load_lanes_p = true;
+	  else if (!vect_strided_load_supported (vectype, group_size))
+	    return false;
+	}
     }
 
   if (negative)
@@ -4007,18 +4408,23 @@
   if (!vec_stmt) /* transformation not required.  */
     {
       STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
-      vect_model_load_cost (stmt_info, ncopies, NULL);
+      vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL);
       return true;
     }
 
   if (vect_print_dump_info (REPORT_DETAILS))
-    fprintf (vect_dump, "transform load.");
+    fprintf (vect_dump, "transform load. ncopies = %d", ncopies);
 
   /** Transform.  **/
 
   if (strided_load)
     {
       first_stmt = DR_GROUP_FIRST_DR (stmt_info);
+      if (slp
+          && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance)
+          && first_stmt != VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0))
+        first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
+
       /* Check if the chain of loads is already vectorized.  */
       if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
 	{
@@ -4038,8 +4444,6 @@
     	}
       else
 	vec_num = group_size;
-
-      dr_chain = VEC_alloc (tree, heap, vec_num);
     }
   else
     {
@@ -4050,6 +4454,11 @@
 
   alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
   gcc_assert (alignment_support_scheme);
+  /* Targets with load-lane instructions must not require explicit
+     realignment.  */
+  gcc_assert (!load_lanes_p
+	      || alignment_support_scheme == dr_aligned
+	      || alignment_support_scheme == dr_unaligned_supported);
 
   /* In case the vectorization factor (VF) is bigger than the number
      of elements that we can fit in a vectype (nunits), we have to generate
@@ -4181,208 +4590,252 @@
   if (negative)
     offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
 
+  if (load_lanes_p)
+    aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
+  else
+    aggr_type = vectype;
+
   prev_stmt_info = NULL;
   for (j = 0; j < ncopies; j++)
     {
       /* 1. Create the vector pointer update chain.  */
       if (j == 0)
-        dataref_ptr = vect_create_data_ref_ptr (first_stmt,
+        dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type,
 					        at_loop, offset,
 						&dummy, &ptr_incr, false,
 						&inv_p);
       else
-        dataref_ptr =
-		bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE);
+        dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
+				       TYPE_SIZE_UNIT (aggr_type));
+
+      if (strided_load || slp_perm)
+	dr_chain = VEC_alloc (tree, heap, vec_num);
 
-      for (i = 0; i < vec_num; i++)
+      if (load_lanes_p)
 	{
-	  if (i > 0)
-	    dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
-					   NULL_TREE);
+	  tree vec_array;
 
-	  /* 2. Create the vector-load in the loop.  */
-	  switch (alignment_support_scheme)
-	    {
-	    case dr_aligned:
-	    case dr_unaligned_supported:
-	      {
-		struct ptr_info_def *pi;
-		data_ref
-		  = build2 (MEM_REF, vectype, dataref_ptr,
-			    build_int_cst (reference_alias_ptr_type
-					   (DR_REF (first_dr)), 0));
-		pi = get_ptr_info (dataref_ptr);
-		pi->align = TYPE_ALIGN_UNIT (vectype);
-		if (alignment_support_scheme == dr_aligned)
-		  {
-		    gcc_assert (aligned_access_p (first_dr));
-		    pi->misalign = 0;
-		  }
-		else if (DR_MISALIGNMENT (first_dr) == -1)
-		  {
-		    TREE_TYPE (data_ref)
-		      = build_aligned_type (TREE_TYPE (data_ref),
-					    TYPE_ALIGN (TREE_TYPE (vectype)));
-		    pi->align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
-		    pi->misalign = 0;
-		  }
-		else
-		  {
-		    TREE_TYPE (data_ref)
-		      = build_aligned_type (TREE_TYPE (data_ref),
-					    TYPE_ALIGN (TREE_TYPE (vectype)));
-		    pi->misalign = DR_MISALIGNMENT (first_dr);
-		  }
-		break;
-	      }
-	    case dr_explicit_realign:
-	      {
-		tree ptr, bump;
-		tree vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
+	  vec_array = create_vector_array (vectype, vec_num);
 
-		if (compute_in_loop)
-		  msq = vect_setup_realignment (first_stmt, gsi,
-						&realignment_token,
-						dr_explicit_realign,
-						dataref_ptr, NULL);
-
-		new_stmt = gimple_build_assign_with_ops
-			     (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
-			      build_int_cst
-			        (TREE_TYPE (dataref_ptr),
-				 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
-		ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
-		gimple_assign_set_lhs (new_stmt, ptr);
-		vect_finish_stmt_generation (stmt, new_stmt, gsi);
-		data_ref
-		  = build2 (MEM_REF, vectype, ptr,
-			    build_int_cst (reference_alias_ptr_type
-					     (DR_REF (first_dr)), 0));
-		vec_dest = vect_create_destination_var (scalar_dest, vectype);
-		new_stmt = gimple_build_assign (vec_dest, data_ref);
-		new_temp = make_ssa_name (vec_dest, new_stmt);
-		gimple_assign_set_lhs (new_stmt, new_temp);
-		gimple_set_vdef (new_stmt, gimple_vdef (stmt));
-		gimple_set_vuse (new_stmt, gimple_vuse (stmt));
-		vect_finish_stmt_generation (stmt, new_stmt, gsi);
-		msq = new_temp;
-
-		bump = size_binop (MULT_EXPR, vs_minus_1,
-				   TYPE_SIZE_UNIT (scalar_type));
-		ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
-		new_stmt = gimple_build_assign_with_ops
-			     (BIT_AND_EXPR, NULL_TREE, ptr,
-			      build_int_cst
-			        (TREE_TYPE (ptr),
-				 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
-		ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
-		gimple_assign_set_lhs (new_stmt, ptr);
-		vect_finish_stmt_generation (stmt, new_stmt, gsi);
-		data_ref
-		  = build2 (MEM_REF, vectype, ptr,
-			    build_int_cst (reference_alias_ptr_type
-					     (DR_REF (first_dr)), 0));
-	        break;
-	      }
-	    case dr_explicit_realign_optimized:
-	      new_stmt = gimple_build_assign_with_ops
-			   (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
-			    build_int_cst
-			      (TREE_TYPE (dataref_ptr),
-			       -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
-	      new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
-	      gimple_assign_set_lhs (new_stmt, new_temp);
-	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
-	      data_ref
-		= build2 (MEM_REF, vectype, new_temp,
-			  build_int_cst (reference_alias_ptr_type
-					   (DR_REF (first_dr)), 0));
-	      break;
-	    default:
-	      gcc_unreachable ();
-	    }
-	  vec_dest = vect_create_destination_var (scalar_dest, vectype);
-	  new_stmt = gimple_build_assign (vec_dest, data_ref);
-	  new_temp = make_ssa_name (vec_dest, new_stmt);
-	  gimple_assign_set_lhs (new_stmt, new_temp);
+	  /* Emit:
+	       VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]).  */
+	  data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
+	  new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
+	  gimple_call_set_lhs (new_stmt, vec_array);
 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
 	  mark_symbols_for_renaming (new_stmt);
 
-	  /* 3. Handle explicit realignment if necessary/supported.  Create in
-		loop: vec_dest = realign_load (msq, lsq, realignment_token)  */
-	  if (alignment_support_scheme == dr_explicit_realign_optimized
-	      || alignment_support_scheme == dr_explicit_realign)
+	  /* Extract each vector into an SSA_NAME.  */
+	  for (i = 0; i < vec_num; i++)
+	    {
+	      new_temp = read_vector_array (stmt, gsi, scalar_dest,
+					    vec_array, i);
+	      VEC_quick_push (tree, dr_chain, new_temp);
+	    }
+
+	  /* Record the mapping between SSA_NAMEs and statements.  */
+	  vect_record_strided_load_vectors (stmt, dr_chain);
+	}
+      else
+	{
+	  for (i = 0; i < vec_num; i++)
 	    {
-	      tree tmp;
+	      if (i > 0)
+		dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
+					       stmt, NULL_TREE);
 
-	      lsq = gimple_assign_lhs (new_stmt);
-	      if (!realignment_token)
-		realignment_token = dataref_ptr;
+	      /* 2. Create the vector-load in the loop.  */
+	      switch (alignment_support_scheme)
+		{
+		case dr_aligned:
+		case dr_unaligned_supported:
+		  {
+		    struct ptr_info_def *pi;
+		    data_ref
+		      = build2 (MEM_REF, vectype, dataref_ptr,
+				build_int_cst (reference_alias_ptr_type
+					       (DR_REF (first_dr)), 0));
+		    pi = get_ptr_info (dataref_ptr);
+		    pi->align = TYPE_ALIGN_UNIT (vectype);
+		    if (alignment_support_scheme == dr_aligned)
+		      {
+			gcc_assert (aligned_access_p (first_dr));
+			pi->misalign = 0;
+		      }
+		    else if (DR_MISALIGNMENT (first_dr) == -1)
+		      {
+			TREE_TYPE (data_ref)
+			  = build_aligned_type (TREE_TYPE (data_ref),
+						TYPE_ALIGN (elem_type));
+			pi->align = TYPE_ALIGN_UNIT (elem_type);
+			pi->misalign = 0;
+		      }
+		    else
+		      {
+			TREE_TYPE (data_ref)
+			  = build_aligned_type (TREE_TYPE (data_ref),
+						TYPE_ALIGN (elem_type));
+			pi->misalign = DR_MISALIGNMENT (first_dr);
+		      }
+		    break;
+		  }
+		case dr_explicit_realign:
+		  {
+		    tree ptr, bump;
+		    tree vs_minus_1
+		      = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
+
+		    if (compute_in_loop)
+		      msq = vect_setup_realignment (first_stmt, gsi,
+						    &realignment_token,
+						    dr_explicit_realign,
+						    dataref_ptr, NULL);
+
+		    new_stmt = gimple_build_assign_with_ops
+				 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
+				  build_int_cst
+				    (TREE_TYPE (dataref_ptr),
+				     -(HOST_WIDE_INT)
+				     TYPE_ALIGN_UNIT (vectype)));
+		    ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
+		    gimple_assign_set_lhs (new_stmt, ptr);
+		    vect_finish_stmt_generation (stmt, new_stmt, gsi);
+		    data_ref
+		      = build2 (MEM_REF, vectype, ptr,
+				build_int_cst (reference_alias_ptr_type
+						 (DR_REF (first_dr)), 0));
+		    vec_dest = vect_create_destination_var (scalar_dest,
+							    vectype);
+		    new_stmt = gimple_build_assign (vec_dest, data_ref);
+		    new_temp = make_ssa_name (vec_dest, new_stmt);
+		    gimple_assign_set_lhs (new_stmt, new_temp);
+		    gimple_set_vdef (new_stmt, gimple_vdef (stmt));
+		    gimple_set_vuse (new_stmt, gimple_vuse (stmt));
+		    vect_finish_stmt_generation (stmt, new_stmt, gsi);
+		    msq = new_temp;
+
+		    bump = size_binop (MULT_EXPR, vs_minus_1,
+				       TYPE_SIZE_UNIT (scalar_type));
+		    ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
+		    new_stmt = gimple_build_assign_with_ops
+				 (BIT_AND_EXPR, NULL_TREE, ptr,
+				  build_int_cst
+				    (TREE_TYPE (ptr),
+				     -(HOST_WIDE_INT)
+				     TYPE_ALIGN_UNIT (vectype)));
+		    ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
+		    gimple_assign_set_lhs (new_stmt, ptr);
+		    vect_finish_stmt_generation (stmt, new_stmt, gsi);
+		    data_ref
+		      = build2 (MEM_REF, vectype, ptr,
+				build_int_cst (reference_alias_ptr_type
+						 (DR_REF (first_dr)), 0));
+		    break;
+		  }
+		case dr_explicit_realign_optimized:
+		  new_stmt = gimple_build_assign_with_ops
+			       (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
+				build_int_cst
+				  (TREE_TYPE (dataref_ptr),
+				   -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
+		  new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr),
+					    new_stmt);
+		  gimple_assign_set_lhs (new_stmt, new_temp);
+		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
+		  data_ref
+		    = build2 (MEM_REF, vectype, new_temp,
+			      build_int_cst (reference_alias_ptr_type
+					       (DR_REF (first_dr)), 0));
+		  break;
+		default:
+		  gcc_unreachable ();
+		}
 	      vec_dest = vect_create_destination_var (scalar_dest, vectype);
-	      tmp = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq,
-			    realignment_token);
-	      new_stmt = gimple_build_assign (vec_dest, tmp);
+	      new_stmt = gimple_build_assign (vec_dest, data_ref);
 	      new_temp = make_ssa_name (vec_dest, new_stmt);
 	      gimple_assign_set_lhs (new_stmt, new_temp);
 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
+	      mark_symbols_for_renaming (new_stmt);
 
-	      if (alignment_support_scheme == dr_explicit_realign_optimized)
+	      /* 3. Handle explicit realignment if necessary/supported.
+		    Create in loop:
+		       vec_dest = realign_load (msq, lsq, realignment_token)  */
+	      if (alignment_support_scheme == dr_explicit_realign_optimized
+		  || alignment_support_scheme == dr_explicit_realign)
 		{
-		  gcc_assert (phi);
-		  if (i == vec_num - 1 && j == ncopies - 1)
-		    add_phi_arg (phi, lsq, loop_latch_edge (containing_loop),
-				 UNKNOWN_LOCATION);
-		  msq = lsq;
-		}
-	    }
+		  tree tmp;
 
-	  /* 4. Handle invariant-load.  */
-	  if (inv_p && !bb_vinfo)
-	    {
-	      gcc_assert (!strided_load);
-	      gcc_assert (nested_in_vect_loop_p (loop, stmt));
-	      if (j == 0)
-		{
-		  int k;
-		  tree t = NULL_TREE;
-		  tree vec_inv, bitpos, bitsize = TYPE_SIZE (scalar_type);
-
-		  /* CHECKME: bitpos depends on endianess?  */
-		  bitpos = bitsize_zero_node;
-		  vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp,
-				    bitsize, bitpos);
-		  vec_dest =
-			vect_create_destination_var (scalar_dest, NULL_TREE);
-		  new_stmt = gimple_build_assign (vec_dest, vec_inv);
-                  new_temp = make_ssa_name (vec_dest, new_stmt);
+		  lsq = gimple_assign_lhs (new_stmt);
+		  if (!realignment_token)
+		    realignment_token = dataref_ptr;
+		  vec_dest = vect_create_destination_var (scalar_dest, vectype);
+		  tmp = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq,
+				realignment_token);
+		  new_stmt = gimple_build_assign (vec_dest, tmp);
+		  new_temp = make_ssa_name (vec_dest, new_stmt);
 		  gimple_assign_set_lhs (new_stmt, new_temp);
 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
 
-		  for (k = nunits - 1; k >= 0; --k)
-		    t = tree_cons (NULL_TREE, new_temp, t);
-		  /* FIXME: use build_constructor directly.  */
-		  vec_inv = build_constructor_from_list (vectype, t);
-		  new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
-		  new_stmt = SSA_NAME_DEF_STMT (new_temp);
+		  if (alignment_support_scheme == dr_explicit_realign_optimized)
+		    {
+		      gcc_assert (phi);
+		      if (i == vec_num - 1 && j == ncopies - 1)
+			add_phi_arg (phi, lsq,
+				     loop_latch_edge (containing_loop),
+				     UNKNOWN_LOCATION);
+		      msq = lsq;
+		    }
 		}
-	      else
-		gcc_unreachable (); /* FORNOW. */
-	    }
 
-	  if (negative)
-	    {
-	      new_temp = reverse_vec_elements (new_temp, stmt, gsi);
-	      new_stmt = SSA_NAME_DEF_STMT (new_temp);
-	    }
+	      /* 4. Handle invariant-load.  */
+	      if (inv_p && !bb_vinfo)
+		{
+		  gcc_assert (!strided_load);
+		  gcc_assert (nested_in_vect_loop_p (loop, stmt));
+		  if (j == 0)
+		    {
+		      int k;
+		      tree t = NULL_TREE;
+		      tree vec_inv, bitpos, bitsize = TYPE_SIZE (scalar_type);
+
+		      /* CHECKME: bitpos depends on endianess?  */
+		      bitpos = bitsize_zero_node;
+		      vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp,
+					bitsize, bitpos);
+		      vec_dest = vect_create_destination_var (scalar_dest,
+							      NULL_TREE);
+		      new_stmt = gimple_build_assign (vec_dest, vec_inv);
+		      new_temp = make_ssa_name (vec_dest, new_stmt);
+		      gimple_assign_set_lhs (new_stmt, new_temp);
+		      vect_finish_stmt_generation (stmt, new_stmt, gsi);
+
+		      for (k = nunits - 1; k >= 0; --k)
+			t = tree_cons (NULL_TREE, new_temp, t);
+		      /* FIXME: use build_constructor directly.  */
+		      vec_inv = build_constructor_from_list (vectype, t);
+		      new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
+		      new_stmt = SSA_NAME_DEF_STMT (new_temp);
+		    }
+		  else
+		    gcc_unreachable (); /* FORNOW. */
+		}
 
-	  /* Collect vector loads and later create their permutation in
-	     vect_transform_strided_load ().  */
-          if (strided_load || slp_perm)
-            VEC_quick_push (tree, dr_chain, new_temp);
+	      if (negative)
+		{
+		  new_temp = reverse_vec_elements (new_temp, stmt, gsi);
+		  new_stmt = SSA_NAME_DEF_STMT (new_temp);
+		}
 
-         /* Store vector loads in the corresponding SLP_NODE.  */
-	  if (slp && !slp_perm)
-	    VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
+	      /* Collect vector loads and later create their permutation in
+		 vect_transform_strided_load ().  */
+	      if (strided_load || slp_perm)
+		VEC_quick_push (tree, dr_chain, new_temp);
+
+	      /* Store vector loads in the corresponding SLP_NODE.  */
+	      if (slp && !slp_perm)
+		VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
+				new_stmt);
+	    }
 	}
 
       if (slp && !slp_perm)
@@ -4401,12 +4854,9 @@
         {
           if (strided_load)
   	    {
-	      if (!vect_transform_strided_load (stmt, dr_chain, group_size, gsi))
-	        return false;
-
+	      if (!load_lanes_p)
+		vect_transform_strided_load (stmt, dr_chain, group_size, gsi);
 	      *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
-              VEC_free (tree, heap, dr_chain);
-	      dr_chain = VEC_alloc (tree, heap, group_size);
 	    }
           else
 	    {
@@ -4417,11 +4867,10 @@
 	      prev_stmt_info = vinfo_for_stmt (new_stmt);
 	    }
         }
+      if (dr_chain)
+	VEC_free (tree, heap, dr_chain);
     }
 
-  if (dr_chain)
-    VEC_free (tree, heap, dr_chain);
-
   return true;
 }
 
@@ -4435,7 +4884,7 @@
    condition operands are supportable using vec_is_simple_use.  */
 
 static bool
-vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
+vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
 {
   tree lhs, rhs;
   tree def;
@@ -4450,7 +4899,7 @@
   if (TREE_CODE (lhs) == SSA_NAME)
     {
       gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
-      if (!vect_is_simple_use (lhs, loop_vinfo, NULL, &lhs_def_stmt, &def,
+      if (!vect_is_simple_use (lhs, loop_vinfo, bb_vinfo, &lhs_def_stmt, &def,
                                &dt))
 	return false;
     }
@@ -4461,7 +4910,7 @@
   if (TREE_CODE (rhs) == SSA_NAME)
     {
       gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
-      if (!vect_is_simple_use (rhs, loop_vinfo, NULL, &rhs_def_stmt, &def,
+      if (!vect_is_simple_use (rhs, loop_vinfo, bb_vinfo, &rhs_def_stmt, &def,
                                &dt))
 	return false;
     }
@@ -4487,7 +4936,8 @@
 
 bool
 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
-			gimple *vec_stmt, tree reduc_def, int reduc_index)
+			gimple *vec_stmt, tree reduc_def, int reduc_index,
+			slp_tree slp_node)
 {
   tree scalar_dest = NULL_TREE;
   tree vec_dest = NULL_TREE;
@@ -4504,19 +4954,24 @@
   tree def;
   enum vect_def_type dt, dts[4];
   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
-  int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
+  int ncopies;
   enum tree_code code;
   stmt_vec_info prev_stmt_info = NULL;
-  int j;
+  int i, j;
+  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
+  VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
+  VEC (tree, heap) *vec_oprnds2 = NULL, *vec_oprnds3 = NULL;
 
-  /* FORNOW: unsupported in basic block SLP.  */
-  gcc_assert (loop_vinfo);
+  if (slp_node || PURE_SLP_STMT (stmt_info))
+    ncopies = 1;
+  else
+    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
 
   gcc_assert (ncopies >= 1);
-  if (reduc_index && ncopies > 1)
+  if (reduc_index && (ncopies > 1 || STMT_SLP_TYPE (stmt_info)))
     return false; /* FORNOW */
 
-  if (!STMT_VINFO_RELEVANT_P (stmt_info))
+  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) 
     return false;
 
   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
@@ -4524,10 +4979,6 @@
            && reduc_def))
     return false;
 
-  /* FORNOW: SLP not supported.  */
-  if (STMT_SLP_TYPE (stmt_info))
-    return false;
-
   /* FORNOW: not yet supported.  */
   if (STMT_VINFO_LIVE_P (stmt_info))
     {
@@ -4551,7 +5002,7 @@
   then_clause = TREE_OPERAND (op, 1);
   else_clause = TREE_OPERAND (op, 2);
 
-  if (!vect_is_simple_cond (cond_expr, loop_vinfo))
+  if (!vect_is_simple_cond (cond_expr, loop_vinfo, bb_vinfo))
     return false;
 
   /* We do not handle two different vector types for the condition
@@ -4563,7 +5014,7 @@
   if (TREE_CODE (then_clause) == SSA_NAME)
     {
       gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
-      if (!vect_is_simple_use (then_clause, loop_vinfo, NULL,
+      if (!vect_is_simple_use (then_clause, loop_vinfo, bb_vinfo,
 			       &then_def_stmt, &def, &dt))
 	return false;
     }
@@ -4575,7 +5026,7 @@
   if (TREE_CODE (else_clause) == SSA_NAME)
     {
       gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
-      if (!vect_is_simple_use (else_clause, loop_vinfo, NULL,
+      if (!vect_is_simple_use (else_clause, loop_vinfo, bb_vinfo,
 			       &else_def_stmt, &def, &dt))
 	return false;
     }
@@ -4593,7 +5044,15 @@
       return expand_vec_cond_expr_p (TREE_TYPE (op), vec_mode);
     }
 
-  /* Transform */
+  /* Transform.  */
+
+  if (!slp_node)
+    {
+      vec_oprnds0 = VEC_alloc (tree, heap, 1);
+      vec_oprnds1 = VEC_alloc (tree, heap, 1);
+      vec_oprnds2 = VEC_alloc (tree, heap, 1);
+      vec_oprnds3 = VEC_alloc (tree, heap, 1);
+    }
 
   /* Handle def.  */
   scalar_dest = gimple_assign_lhs (stmt);
@@ -4602,67 +5061,118 @@
   /* Handle cond expr.  */
   for (j = 0; j < ncopies; j++)
     {
-      gimple new_stmt;
+      gimple new_stmt = NULL;
       if (j == 0)
 	{
-	  gimple gtemp;
-	  vec_cond_lhs =
+          if (slp_node)
+            {
+              VEC (tree, heap) *ops = VEC_alloc (tree, heap, 4);
+              VEC (slp_void_p, heap) *vec_defs;
+
+	      vec_defs = VEC_alloc (slp_void_p, heap, 4);
+              VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 0));
+              VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 1));
+              VEC_safe_push (tree, heap, ops, then_clause);
+              VEC_safe_push (tree, heap, ops, else_clause);
+              vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
+              vec_oprnds3 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
+              vec_oprnds2 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
+              vec_oprnds1 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
+              vec_oprnds0 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
+
+              VEC_free (tree, heap, ops);
+              VEC_free (slp_void_p, heap, vec_defs);
+            }
+          else
+            {
+	      gimple gtemp;
+	      vec_cond_lhs =
 	      vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
 					    stmt, NULL);
-	  vect_is_simple_use (TREE_OPERAND (cond_expr, 0), loop_vinfo,
+	      vect_is_simple_use (TREE_OPERAND (cond_expr, 0), loop_vinfo,
 			      NULL, &gtemp, &def, &dts[0]);
-	  vec_cond_rhs =
-	      vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
-					    stmt, NULL);
-	  vect_is_simple_use (TREE_OPERAND (cond_expr, 1), loop_vinfo,
-			      NULL, &gtemp, &def, &dts[1]);
-	  if (reduc_index == 1)
-	    vec_then_clause = reduc_def;
-	  else
-	    {
-	      vec_then_clause = vect_get_vec_def_for_operand (then_clause,
-							      stmt, NULL);
-	      vect_is_simple_use (then_clause, loop_vinfo,
-				  NULL, &gtemp, &def, &dts[2]);
-	    }
-	  if (reduc_index == 2)
-	    vec_else_clause = reduc_def;
-	  else
-	    {
-	      vec_else_clause = vect_get_vec_def_for_operand (else_clause,
+
+	      vec_cond_rhs =
+		vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
+						stmt, NULL);
+	      vect_is_simple_use (TREE_OPERAND (cond_expr, 1), loop_vinfo,
+					NULL, &gtemp, &def, &dts[1]);
+	      if (reduc_index == 1)
+		vec_then_clause = reduc_def;
+	      else
+		{
+		  vec_then_clause = vect_get_vec_def_for_operand (then_clause,
+		 		  			      stmt, NULL);
+	          vect_is_simple_use (then_clause, loop_vinfo,
+					  NULL, &gtemp, &def, &dts[2]);
+		}
+	      if (reduc_index == 2)
+		vec_else_clause = reduc_def;
+	      else
+		{
+		  vec_else_clause = vect_get_vec_def_for_operand (else_clause,
 							      stmt, NULL);
-	      vect_is_simple_use (else_clause, loop_vinfo,
+		  vect_is_simple_use (else_clause, loop_vinfo,
 				  NULL, &gtemp, &def, &dts[3]);
+		}
 	    }
 	}
       else
 	{
-	  vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0], vec_cond_lhs);
-	  vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1], vec_cond_rhs);
+	  vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
+						VEC_pop (tree, vec_oprnds0));
+	  vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
+						VEC_pop (tree, vec_oprnds1));
 	  vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
-							    vec_then_clause);
+						VEC_pop (tree, vec_oprnds2));
 	  vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
-							    vec_else_clause);
+						VEC_pop (tree, vec_oprnds3));
+	}
+
+      if (!slp_node)
+        {
+	  VEC_quick_push (tree, vec_oprnds0, vec_cond_lhs);
+	  VEC_quick_push (tree, vec_oprnds1, vec_cond_rhs);
+	  VEC_quick_push (tree, vec_oprnds2, vec_then_clause);
+	  VEC_quick_push (tree, vec_oprnds3, vec_else_clause);
 	}
 
       /* Arguments are ready.  Create the new vector stmt.  */
-      vec_compare = build2 (TREE_CODE (cond_expr), vectype,
-			    vec_cond_lhs, vec_cond_rhs);
-      vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
-			      vec_compare, vec_then_clause, vec_else_clause);
+      FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_cond_lhs)
+        {
+          vec_cond_rhs = VEC_index (tree, vec_oprnds1, i);
+          vec_then_clause = VEC_index (tree, vec_oprnds2, i);
+          vec_else_clause = VEC_index (tree, vec_oprnds3, i);
+
+          vec_compare = build2 (TREE_CODE (cond_expr), vectype,
+  			       vec_cond_lhs, vec_cond_rhs);
+          vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
+ 		         vec_compare, vec_then_clause, vec_else_clause);
 
-      new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
-      new_temp = make_ssa_name (vec_dest, new_stmt);
-      gimple_assign_set_lhs (new_stmt, new_temp);
-      vect_finish_stmt_generation (stmt, new_stmt, gsi);
-      if (j == 0)
-        STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
-      else
-        STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
+          new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
+          new_temp = make_ssa_name (vec_dest, new_stmt);
+          gimple_assign_set_lhs (new_stmt, new_temp);
+          vect_finish_stmt_generation (stmt, new_stmt, gsi);
+          if (slp_node)
+            VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
+        }
 
-      prev_stmt_info = vinfo_for_stmt (new_stmt);
+        if (slp_node)
+          continue;
+
+        if (j == 0)
+          STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
+        else
+          STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
+
+        prev_stmt_info = vinfo_for_stmt (new_stmt);
     }
 
+  VEC_free (tree, heap, vec_oprnds0);
+  VEC_free (tree, heap, vec_oprnds1);
+  VEC_free (tree, heap, vec_oprnds2);
+  VEC_free (tree, heap, vec_oprnds3);
+
   return true;
 }
 
@@ -4677,6 +5187,7 @@
   enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
   bool ok;
   tree scalar_type, vectype;
+  gimple pattern_stmt, pattern_def_stmt;
 
   if (vect_print_dump_info (REPORT_DETAILS))
     {
@@ -4698,16 +5209,70 @@
      - any LABEL_EXPRs in the loop
      - computations that are used only for array indexing or loop control.
      In basic blocks we only analyze statements that are a part of some SLP
-     instance, therefore, all the statements are relevant.  */
+     instance, therefore, all the statements are relevant.  
+
+     Pattern statement needs to be analyzed instead of the original statement
+     if the original statement is not relevant. Otherwise, we analyze both
+     statements.  */
 
+  pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
   if (!STMT_VINFO_RELEVANT_P (stmt_info)
       && !STMT_VINFO_LIVE_P (stmt_info))
     {
-      if (vect_print_dump_info (REPORT_DETAILS))
-        fprintf (vect_dump, "irrelevant.");
+      if (STMT_VINFO_IN_PATTERN_P (stmt_info)
+          && pattern_stmt
+          && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
+              || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
+        {
+          /* Analyze PATTERN_STMT instead of the original stmt.  */
+          stmt = pattern_stmt;
+          stmt_info = vinfo_for_stmt (pattern_stmt);
+          if (vect_print_dump_info (REPORT_DETAILS))
+            {
+              fprintf (vect_dump, "==> examining pattern statement: ");
+              print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
+            }
+        }
+      else
+        {
+          if (vect_print_dump_info (REPORT_DETAILS))
+            fprintf (vect_dump, "irrelevant.");
 
-      return true;
+          return true;
+        }
     }
+  else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
+           && pattern_stmt
+           && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
+               || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
+    {
+      /* Analyze PATTERN_STMT too.  */
+      if (vect_print_dump_info (REPORT_DETAILS))
+        {
+          fprintf (vect_dump, "==> examining pattern statement: ");
+          print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
+        }
+
+      if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
+        return false;
+   }
+
+  if (is_pattern_stmt_p (stmt_info)
+      && (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info))
+      && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
+          || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt))))
+    {
+      /* Analyze def stmt of STMT if it's a pattern stmt.  */
+      if (vect_print_dump_info (REPORT_DETAILS))
+        {
+          fprintf (vect_dump, "==> examining pattern def statement: ");
+          print_gimple_stmt (vect_dump, pattern_def_stmt, 0, TDF_SLIM);
+        }
+
+      if (!vect_analyze_stmt (pattern_def_stmt, need_to_vectorize, node))
+        return false;
+   }
+
 
   switch (STMT_VINFO_DEF_TYPE (stmt_info))
     {
@@ -4781,15 +5346,18 @@
             || vectorizable_call (stmt, NULL, NULL)
             || vectorizable_store (stmt, NULL, NULL, NULL)
             || vectorizable_reduction (stmt, NULL, NULL, NULL)
-            || vectorizable_condition (stmt, NULL, NULL, NULL, 0));
+            || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
     else
       {
         if (bb_vinfo)
-          ok = (vectorizable_shift (stmt, NULL, NULL, node)
+          ok = (vectorizable_type_promotion (stmt, NULL, NULL, node)
+                || vectorizable_type_demotion (stmt, NULL, NULL, node)
+               || vectorizable_shift (stmt, NULL, NULL, node)
                 || vectorizable_operation (stmt, NULL, NULL, node)
                 || vectorizable_assignment (stmt, NULL, NULL, node)
                 || vectorizable_load (stmt, NULL, NULL, node, NULL)
-                || vectorizable_store (stmt, NULL, NULL, node));
+                || vectorizable_store (stmt, NULL, NULL, node)
+                || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
       }
 
   if (!ok)
@@ -4825,27 +5393,6 @@
        return false;
     }
 
-  if (!PURE_SLP_STMT (stmt_info))
-    {
-      /* Groups of strided accesses whose size is not a power of 2 are not
-         vectorizable yet using loop-vectorization.  Therefore, if this stmt
-	 feeds non-SLP-able stmts (i.e., this stmt has to be both SLPed and
-	 loop-based vectorized), the loop cannot be vectorized.  */
-      if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
-          && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt (
-                                        DR_GROUP_FIRST_DR (stmt_info)))) == -1)
-        {
-          if (vect_print_dump_info (REPORT_DETAILS))
-            {
-              fprintf (vect_dump, "not vectorized: the size of group "
-                                  "of strided accesses is not a power of 2");
-              print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
-            }
-
-          return false;
-        }
-    }
-
   return true;
 }
 
@@ -4862,7 +5409,6 @@
   bool is_store = false;
   gimple vec_stmt = NULL;
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
-  gimple orig_stmt_in_pattern;
   bool done;
 
   switch (STMT_VINFO_TYPE (stmt_info))
@@ -4927,8 +5473,7 @@
       break;
 
     case condition_vec_info_type:
-      gcc_assert (!slp_node);
-      done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0);
+      done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
       gcc_assert (done);
       break;
 
@@ -5001,21 +5546,7 @@
     }
 
   if (vec_stmt)
-    {
-      STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
-      orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
-      if (orig_stmt_in_pattern)
-	{
-	  stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
-	  /* STMT was inserted by the vectorizer to replace a computation idiom.
-	     ORIG_STMT_IN_PATTERN is a stmt in the original sequence that
-	     computed this idiom.  We need to record a pointer to VEC_STMT in
-	     the stmt_info of ORIG_STMT_IN_PATTERN.  See more details in the
-	     documentation of vect_pattern_recog.  */
-	  if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
-	    STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
-	}
-    }
+    STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
 
   return is_store;
 }
@@ -5065,6 +5596,7 @@
   STMT_VINFO_VECTORIZABLE (res) = true;
   STMT_VINFO_IN_PATTERN_P (res) = false;
   STMT_VINFO_RELATED_STMT (res) = NULL;
+  STMT_VINFO_PATTERN_DEF_STMT (res) = NULL;
   STMT_VINFO_DATA_REF (res) = NULL;
 
   STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
@@ -5402,8 +5934,12 @@
       || *dt == vect_nested_cycle)
     {
       stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
-      if (STMT_VINFO_IN_PATTERN_P (stmt_info))
+
+      if (STMT_VINFO_IN_PATTERN_P (stmt_info) 
+          && !STMT_VINFO_RELEVANT (stmt_info)
+          && !STMT_VINFO_LIVE_P (stmt_info))
 	stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
+
       *vectype = STMT_VINFO_VECTYPE (stmt_info);
       gcc_assert (*vectype != NULL_TREE);
     }
@@ -5452,7 +5988,7 @@
 {
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
-  struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
+  struct loop *vect_loop = NULL;
   bool ordered_p;
   enum machine_mode vec_mode;
   enum insn_code icode1, icode2;
@@ -5461,6 +5997,9 @@
   tree wide_vectype = vectype_out;
   enum tree_code c1, c2;
 
+  if (loop_info)
+    vect_loop = LOOP_VINFO_LOOP (loop_info);
+
   /* The result of a vectorized widening operation usually requires two vectors
      (because the widened results do not fit int one vector). The generated
      vector results would normally be expected to be generated in the same
@@ -5481,7 +6020,8 @@
      iterations in parallel).  We therefore don't allow to change the order
      of the computation in the inner-loop during outer-loop vectorization.  */
 
-   if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
+   if (vect_loop
+       && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
        && !nested_in_vect_loop_p (vect_loop, stmt))
      ordered_p = false;
    else
@@ -5518,6 +6058,19 @@
         }
       break;
 
+    case WIDEN_LSHIFT_EXPR:
+      if (BYTES_BIG_ENDIAN)
+        {
+          c1 = VEC_WIDEN_LSHIFT_HI_EXPR;
+          c2 = VEC_WIDEN_LSHIFT_LO_EXPR;
+        }
+      else
+        {
+          c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
+          c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
+        }
+      break;
+
     CASE_CONVERT:
       if (BYTES_BIG_ENDIAN)
         {
--- a/src/gcc/value-prof.c
+++ b/src/gcc/value-prof.c
@@ -1252,6 +1252,9 @@
   if (TREE_CODE (callee) == FUNCTION_DECL)
     return false;
 
+  if (gimple_call_internal_p (stmt))
+    return false;
+
   histogram = gimple_histogram_value_of_type (cfun, stmt, HIST_TYPE_INDIR_CALL);
   if (!histogram)
     return false;
@@ -1640,6 +1643,7 @@
   tree callee;
 
   if (gimple_code (stmt) != GIMPLE_CALL
+      || gimple_call_internal_p (stmt)
       || gimple_call_fndecl (stmt) != NULL_TREE)
     return;
 
--- a/src/libstdc++-v3/libsupc++/eh_arm.cc
+++ b/src/libstdc++-v3/libsupc++/eh_arm.cc
@@ -30,10 +30,11 @@
 using namespace __cxxabiv1;
 
 
-// Given the thrown type THROW_TYPE, pointer to a variable containing a
-// pointer to the exception object THROWN_PTR_P and a type CATCH_TYPE to
-// compare against, return whether or not there is a match and if so,
-// update *THROWN_PTR_P.
+// Given the thrown type THROW_TYPE, exception object UE_HEADER and a
+// type CATCH_TYPE to compare against, return whether or not there is
+// a match and if so, update *THROWN_PTR_P to point to either the
+// type-matched object, or in the case of a pointer type, the object
+// pointed to by the pointer.
 
 extern "C" __cxa_type_match_result
 __cxa_type_match(_Unwind_Exception* ue_header,
@@ -41,51 +42,51 @@
 		 bool is_reference __attribute__((__unused__)),
 		 void** thrown_ptr_p)
 {
-  bool forced_unwind = __is_gxx_forced_unwind_class(ue_header->exception_class);
-  bool foreign_exception = !forced_unwind && !__is_gxx_exception_class(ue_header->exception_class);
-  bool dependent_exception =
-    __is_dependent_exception(ue_header->exception_class);
+  bool forced_unwind
+    = __is_gxx_forced_unwind_class(ue_header->exception_class);
+  bool foreign_exception
+    = !forced_unwind && !__is_gxx_exception_class(ue_header->exception_class);
+  bool dependent_exception
+    = __is_dependent_exception(ue_header->exception_class);
   __cxa_exception* xh = __get_exception_header_from_ue(ue_header);
   __cxa_dependent_exception *dx = __get_dependent_exception_from_ue(ue_header);
   const std::type_info* throw_type;
+  void *thrown_ptr = 0;
 
   if (forced_unwind)
     throw_type = &typeid(abi::__forced_unwind);
   else if (foreign_exception)
     throw_type = &typeid(abi::__foreign_exception);
-  else if (dependent_exception)
-    throw_type = __get_exception_header_from_obj
-      (dx->primaryException)->exceptionType;
   else
-    throw_type = xh->exceptionType;
-
-  void* thrown_ptr = *thrown_ptr_p;
+    {
+      if (dependent_exception)
+	xh = __get_exception_header_from_obj (dx->primaryException);
+      throw_type = xh->exceptionType;
+      // We used to require the caller set the target of thrown_ptr_p,
+      // but that's incorrect -- the EHABI makes no such requirement
+      // -- and not all callers will set it.  Fortunately callers that
+      // do initialize will always pass us the value we calculate
+      // here, so there's no backwards compatibility problem.
+      thrown_ptr = __get_object_from_ue (ue_header);
+    }
+  
+  __cxa_type_match_result result = ctm_succeeded;
 
   // Pointer types need to adjust the actual pointer, not
   // the pointer to pointer that is the exception object.
   // This also has the effect of passing pointer types
   // "by value" through the __cxa_begin_catch return value.
   if (throw_type->__is_pointer_p())
-    thrown_ptr = *(void**) thrown_ptr;
+    {
+      thrown_ptr = *(void**) thrown_ptr;
+      // We need to indicate the indirection to our caller.
+      result = ctm_succeeded_with_ptr_to_base;
+    }
 
   if (catch_type->__do_catch(throw_type, &thrown_ptr, 1))
     {
       *thrown_ptr_p = thrown_ptr;
-
-      if (typeid(*catch_type) == typeid (typeid(void*)))
-	{
-	  const __pointer_type_info *catch_pointer_type =
-	    static_cast<const __pointer_type_info *> (catch_type);
-	  const __pointer_type_info *throw_pointer_type =
-	    static_cast<const __pointer_type_info *> (throw_type);
-
-	  if (typeid (*catch_pointer_type->__pointee) != typeid (void)
-	      && (*catch_pointer_type->__pointee != 
-		  *throw_pointer_type->__pointee))
-	    return ctm_succeeded_with_ptr_to_base;
-	}
-
-      return ctm_succeeded;
+      return result;
     }
 
   return ctm_failed;
gcc-4.6-source 4.6.4-6ubuntu2 / usr / src / gcc-4.6 / debian / patches / gcc-linaro.diff