Description: More patches from IBM for PPC.
Author: William Seurer <seurer@us.ibm.com>
Reviewed-By: Adam Conrad <adconrad@ubuntu.com>

Index: mono-3.2.8+dfsg/mono/mini/mini-ppc.c
===================================================================
--- mono-3.2.8+dfsg.orig/mono/mini/mini-ppc.c	2014-05-03 16:42:55.000000000 -0600
+++ mono-3.2.8+dfsg/mono/mini/mini-ppc.c	2014-05-03 16:44:33.687695174 -0600
@@ -860,18 +860,19 @@
 	RegTypeBase,
 	RegTypeFP,
 	RegTypeStructByVal,
-	RegTypeStructByAddr
+   RegTypeStructByAddr,
+   RegTypeFPStructByVal,  // For the v2 ABI, floats should be passed in FRs instead of GRs.  Only valid for ABI v2!
 };
 
 typedef struct {
 	gint32  offset;
 	guint32 vtsize; /* in param area */
 	guint8  reg;
-	guint8  vtregs; /* number of registers used to pass a RegTypeStructByVal */
+   guint8  vtregs; /* number of registers used to pass a RegTypeStructByVal/RegTypeFPStructByVal */
 	guint8  regtype : 4; /* 0 general, 1 basereg, 2 floating point register, see RegType* */
-	guint8  size    : 4; /* 1, 2, 4, 8, or regs used by RegTypeStructByVal */
+   guint8  size    : 4; /* 1, 2, 4, 8, or regs used by RegTypeStructByVal/RegTypeFPStructByVal */
 	guint8  bytes   : 4; /* size in bytes - only valid for
-				RegTypeStructByVal if the struct fits
+            RegTypeStructByVal/RegTypeFPStructByVal if the struct fits
 				in one word, otherwise it's 0*/
 } ArgInfo;
 
@@ -888,6 +889,31 @@
 
 #define DEBUG(a)
 
+
+//
+//  Test if the structure is completely composed of fields that are floats OR doubles
+//
+static gboolean
+is_homogeneous_float_aggregate  (MonoClass *klass)
+{
+  	gboolean has_a_field = FALSE;
+	if (klass) {
+	   gpointer iter = NULL;
+	   MonoClassField *f;
+	   MonoClassField *firstMember = &klass->fields [0];
+	   while ((f = mono_class_get_fields (klass, &iter))) {
+	      if (!(f->type->attrs & FIELD_ATTRIBUTE_STATIC)) {
+	         if (!f->type->byref && mono_metadata_type_equal(firstMember->type, f->type) && (f->type->type == MONO_TYPE_R4 || f->type->type == MONO_TYPE_R8))
+            	has_a_field = TRUE;
+        		else
+	            return FALSE;
+	      }
+	   }
+	}
+   return has_a_field;
+}
+
+
 static void inline
 add_general (guint *gr, guint *stack_size, ArgInfo *ainfo, gboolean simple)
 {
@@ -1107,6 +1133,34 @@
 				int rest = PPC_LAST_ARG_REG - gr + 1;
 				int n_in_regs;
 
+#if PPC_PASS_SMALL_FLOAT_STRUCTS_IN_FR_REGS
+            // Pass small (<= 8 member) structures entirely made up of either float or double members
+            // in FR registers.  There have to be at least 8 registers left.
+	    if (is_homogeneous_float_aggregate(klass) &&
+                (rest >= 8)) {
+		int memberSize = 0;
+		MonoClassField *firstMember = &klass->fields [0];
+		g_assert (firstMember);
+		// The class is homogenous so the first member will be the same as all the members
+		if (firstMember->type->type == MONO_TYPE_R4)
+			memberSize = 4;
+		else if (firstMember->type->type == MONO_TYPE_R8)
+			memberSize = 8;
+		else
+			g_assert_not_reached ();
+			nregs = size / memberSize;
+//		g_assert ((size % membersize == 0));
+		n_in_regs = MIN (rest, nregs);
+		cinfo->args [n].regtype = RegTypeFPStructByVal;
+		cinfo->args [n].vtregs = n_in_regs;
+		cinfo->args [n].size = memberSize;
+		cinfo->args [n].vtsize = nregs - n_in_regs;
+		cinfo->args [n].reg = fr;
+		fr += n_in_regs;
+		FP_ALSO_IN_REG (gr += n_in_regs);  // WDS: Check if this is correct
+	} else
+#endif
+            {
 				align_size += (sizeof (gpointer) - 1);
 				align_size &= ~(sizeof (gpointer) - 1);
 				nregs = (align_size + sizeof (gpointer) -1 ) / sizeof (gpointer);
@@ -1123,6 +1177,8 @@
 				cinfo->args [n].size = n_in_regs;
 				cinfo->args [n].vtsize = nregs - n_in_regs;
 				cinfo->args [n].reg = gr;
+            	    gr += n_in_regs;
+		}
 
 #ifdef __mono_ppc64__
 				if (nregs == 1 && is_pinvoke)
@@ -1130,7 +1186,6 @@
 				else
 #endif
 					cinfo->args [n].bytes = 0;
-				gr += n_in_regs;
 				cinfo->args [n].offset = PPC_STACK_PARAM_OFFSET + stack_size;
 				/*g_print ("offset for arg %d at %d\n", n, PPC_STACK_PARAM_OFFSET + stack_size);*/
 				stack_size += nregs * sizeof (gpointer);
@@ -1153,7 +1208,12 @@
 			cinfo->args [n].size = 4;
 
 			/* It was 7, now it is 8 in LinuxPPC */
-			if (fr <= PPC_LAST_FPARG_REG) {
+         if (fr <= PPC_LAST_FPARG_REG
+#if (_CALL_ELF == 2)
+            // For non-native vararg calls the parms must go in storage
+             && !(!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
+#endif
+            ) {
 				cinfo->args [n].regtype = RegTypeFP;
 				cinfo->args [n].reg = fr;
 				fr ++;
@@ -1170,7 +1230,12 @@
 		case MONO_TYPE_R8:
 			cinfo->args [n].size = 8;
 			/* It was 7, now it is 8 in LinuxPPC */
-			if (fr <= PPC_LAST_FPARG_REG) {
+         if (fr <= PPC_LAST_FPARG_REG
+#if (_CALL_ELF == 2)
+             // For non-native vararg calls the parms must go in storage
+             && !(!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
+#endif
+             ) {
 				cinfo->args [n].regtype = RegTypeFP;
 				cinfo->args [n].reg = fr;
 				fr ++;
@@ -1585,6 +1650,17 @@
 			ins->inst_p1 = mono_mempool_alloc (cfg->mempool, sizeof (ArgInfo));
 			memcpy (ins->inst_p1, ainfo, sizeof (ArgInfo));
 			MONO_ADD_INS (cfg->cbb, ins);
+      } else if (ainfo->regtype == RegTypeFPStructByVal) {
+         /* this is further handled in mono_arch_emit_outarg_vt () */
+         MONO_INST_NEW (cfg, ins, OP_OUTARG_VT);
+         ins->opcode = OP_OUTARG_VT;
+         ins->sreg1 = in->dreg;
+         ins->klass = in->klass;
+         ins->inst_p0 = call;
+         ins->inst_p1 = mono_mempool_alloc (cfg->mempool, sizeof (ArgInfo));
+         memcpy (ins->inst_p1, ainfo, sizeof (ArgInfo));
+         MONO_ADD_INS (cfg->cbb, ins);
+         cfg->flags |= MONO_CFG_HAS_FPOUT;
 		} else if (ainfo->regtype == RegTypeBase) {
 			if (!t->byref && ((t->type == MONO_TYPE_I8) || (t->type == MONO_TYPE_U8))) {
 				MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI8_MEMBASE_REG, ppc_r1, ainfo->offset, in->dreg);
@@ -1688,20 +1764,43 @@
 		} else
 #endif
 			for (i = 0; i < ainfo->vtregs; ++i) {
-				int antipadding = 0;
-				if (ainfo->bytes) {
-					g_assert (i == 0);
-					antipadding = sizeof (gpointer) - ainfo->bytes;
-				}
+//          int antipadding = 0;
+//          if (ainfo->bytes) {
+//             g_assert (i == 0);
+//             antipadding = sizeof (gpointer) - ainfo->bytes;
+//          }
+//          dreg = mono_alloc_ireg (cfg);
+//          MONO_EMIT_NEW_LOAD_MEMBASE (cfg, dreg, src->dreg, soffset);
+//          if (antipadding)
+//             MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SHR_UN_IMM, dreg, dreg, antipadding * 8);
 				dreg = mono_alloc_ireg (cfg);
+            if (ainfo->bytes && mono_class_native_size (ins->klass, NULL) == 1) {
+               MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADU1_MEMBASE, dreg, src->dreg, soffset);
+            } else if (ainfo->bytes && mono_class_native_size (ins->klass, NULL) == 2) {
+              MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADU2_MEMBASE, dreg, src->dreg, soffset);
+            } else if (ainfo->bytes && mono_class_native_size (ins->klass, NULL) == 4) { // WDS -- Maybe <= 4?
+              MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADU4_MEMBASE, dreg, src->dreg, soffset);
+            } else {
 				MONO_EMIT_NEW_LOAD_MEMBASE (cfg, dreg, src->dreg, soffset);
-				if (antipadding)
-					MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SHR_UN_IMM, dreg, dreg, antipadding * 8);
+            }
 				mono_call_inst_add_outarg_reg (cfg, call, dreg, ainfo->reg + i, FALSE);
 				soffset += sizeof (gpointer);
 			}
 		if (ovf_size != 0)
 			mini_emit_memcpy (cfg, ppc_r1, doffset + soffset, src->dreg, soffset, ovf_size * sizeof (gpointer), 0);
+   } else if (ainfo->regtype == RegTypeFPStructByVal) {
+      soffset = 0;
+      for (i = 0; i < ainfo->vtregs; ++i) {
+         dreg = mono_alloc_freg (cfg);
+      	 if (ainfo->size == 4)
+      		MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADR4_MEMBASE, dreg, src->dreg, soffset);
+      	 else // ==8
+       		MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADR8_MEMBASE, dreg, src->dreg, soffset);
+         mono_call_inst_add_outarg_reg (cfg, call, dreg, ainfo->reg + i, FALSE);
+         soffset += ainfo->size;
+      }
+      if (ovf_size != 0)
+         mini_emit_memcpy (cfg, ppc_r1, doffset + soffset, src->dreg, soffset, ovf_size * sizeof (gpointer), 0);
 	} else if (ainfo->regtype == RegTypeFP) {
 		int tmpr = mono_alloc_freg (cfg);
 		if (ainfo->size == 4)
@@ -2140,11 +2239,8 @@
 {
 	switch (ins->opcode) {
 	case OP_ICONV_TO_R_UN: {
-#if G_BYTE_ORDER == G_BIG_ENDIAN
+      // This value is OK as-is for both big and little endian because of how it is stored
 		static const guint64 adjust_val = 0x4330000000000000ULL;
-#else
-		static const guint64 adjust_val = 0x0000000000003043ULL;
-#endif
 		int msw_reg = mono_alloc_ireg (cfg);
 		int adj_reg = mono_alloc_freg (cfg);
 		int tmp_reg = mono_alloc_freg (cfg);
@@ -2155,8 +2251,14 @@
 			basereg = mono_alloc_ireg (cfg);
 			MONO_EMIT_NEW_BIALU_IMM (cfg, OP_IADD_IMM, basereg, cfg->frame_reg, offset);
 		}
+#if G_BYTE_ORDER == G_BIG_ENDIAN
 		MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI4_MEMBASE_REG, basereg, offset, msw_reg);
 		MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI4_MEMBASE_REG, basereg, offset + 4, ins->sreg1);
+#else
+      // For little endian the words are reversed
+      MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI4_MEMBASE_REG, basereg, offset + 4, msw_reg);
+      MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI4_MEMBASE_REG, basereg, offset, ins->sreg1);
+#endif
 		MONO_EMIT_NEW_LOAD_R8 (cfg, adj_reg, &adjust_val);
 		MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADR8_MEMBASE, tmp_reg, basereg, offset);
 		MONO_EMIT_NEW_BIALU (cfg, OP_FSUB, ins->dreg, tmp_reg, adj_reg);
@@ -2206,7 +2308,11 @@
 			MONO_EMIT_NEW_BIALU_IMM (cfg, OP_IADD_IMM, basereg, cfg->frame_reg, offset);
 		}
 		MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORER8_MEMBASE_REG, basereg, offset, ins->sreg1);
+#if G_BYTE_ORDER == G_BIG_ENDIAN
 		MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADI4_MEMBASE, msw_reg, basereg, offset);
+#else
+      MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADI4_MEMBASE, msw_reg, basereg, offset+4);
+#endif
 		MONO_EMIT_NEW_UNALU (cfg, OP_CHECK_FINITE, -1, msw_reg);
 		MONO_EMIT_NEW_UNALU (cfg, OP_FMOVE, ins->dreg, ins->sreg1);
 		ins->opcode = OP_NOP;
@@ -3840,6 +3946,16 @@
 			ppc_ldptr (code, ppc_r2, 8, ins->sreg1);
 			ppc_mtlr (code, ppc_r0);
 #else
+#if (_CALL_ELF == 2)
+         if (ins->flags & MONO_INST_HAS_METHOD) {
+           // Not a global entry point
+         } else {
+             // Need to set up r12 with function entry address for global entry point
+             if (ppc_r12 != ins->sreg1) {
+                ppc_mr(code,ppc_r12,ins->sreg1);
+             }
+         }
+#endif
 			ppc_mtlr (code, ins->sreg1);
 #endif
 			ppc_blrl (code);
@@ -4971,6 +5087,41 @@
 					ppc_stfs (code, ainfo->reg, inst->inst_offset, inst->inst_basereg);
 				else
 					g_assert_not_reached ();
+          } else if (ainfo->regtype == RegTypeFPStructByVal) {
+            int doffset = inst->inst_offset;
+            int soffset = 0;
+            int cur_reg;
+            int size = 0;
+            g_assert (ppc_is_imm16 (inst->inst_offset));
+            g_assert (ppc_is_imm16 (inst->inst_offset + ainfo->vtregs * sizeof (gpointer)));
+            /* FIXME: what if there is no class? */
+            if (sig->pinvoke && mono_class_from_mono_type (inst->inst_vtype))
+               size = mono_class_native_size (mono_class_from_mono_type (inst->inst_vtype), NULL);
+            for (cur_reg = 0; cur_reg < ainfo->vtregs; ++cur_reg) {
+               if (ainfo->size == 4) {
+                  ppc_stfs (code, ainfo->reg + cur_reg, doffset, inst->inst_basereg);
+               } else {
+                  ppc_stfd (code, ainfo->reg + cur_reg, doffset, inst->inst_basereg);
+               }
+               soffset += ainfo->size;
+               doffset += ainfo->size;
+            }
+//            if (ainfo->vtsize) {
+//               /* FIXME: we need to do the shifting here, too */
+//               if (ainfo->bytes)
+//                  NOT_IMPLEMENTED;
+//               /* load the previous stack pointer in r12 (r0 gets overwritten by the memcpy) */
+//               ppc_ldr (code, ppc_r12, 0, ppc_sp);
+//               if ((size & MONO_PPC_32_64_CASE (3, 7)) != 0) {
+//                  code = emit_memcpy (code, size - soffset,
+//                     inst->inst_basereg, doffset,
+//                     ppc_r12, ainfo->offset + soffset);
+//               } else {
+//                  code = emit_memcpy (code, ainfo->vtsize * sizeof (gpointer),
+//                     inst->inst_basereg, doffset,
+//                     ppc_r12, ainfo->offset + soffset);
+//               }
+//            }
 			} else if (ainfo->regtype == RegTypeStructByVal) {
 				int doffset = inst->inst_offset;
 				int soffset = 0;
@@ -5000,9 +5151,18 @@
 #ifdef __mono_ppc64__
 						if (ainfo->bytes) {
 							g_assert (cur_reg == 0);
-							ppc_sldi (code, ppc_r0, ainfo->reg,
-									(sizeof (gpointer) - ainfo->bytes) * 8);
-							ppc_stptr (code, ppc_r0, doffset, inst->inst_basereg);
+//                   ppc_sldi (code, ppc_r0, ainfo->reg,
+//                         (sizeof (gpointer) - ainfo->bytes) * 8);
+//                   ppc_stptr (code, ppc_r0, doffset, inst->inst_basereg);
+                     if (mono_class_native_size (inst->klass, NULL) == 1) {
+                       ppc_stb (code, ainfo->reg + cur_reg, doffset, inst->inst_basereg);
+                     } else if (mono_class_native_size (inst->klass, NULL) == 2) {
+                        ppc_sth (code, ainfo->reg + cur_reg, doffset, inst->inst_basereg);
+                     } else if (mono_class_native_size (inst->klass, NULL) == 4) {  // WDS -- maybe <=4?
+                        ppc_stw (code, ainfo->reg + cur_reg, doffset, inst->inst_basereg);
+                     } else {
+                        ppc_stptr (code, ainfo->reg + cur_reg, doffset, inst->inst_basereg);  // WDS -- Better way?
+                     }
 						} else
 #endif
 						{
Index: mono-3.2.8+dfsg/mono/mini/mini-ppc.h
===================================================================
--- mono-3.2.8+dfsg.orig/mono/mini/mini-ppc.h	2014-05-03 16:42:55.000000000 -0600
+++ mono-3.2.8+dfsg/mono/mini/mini-ppc.h	2014-05-03 16:42:55.000000000 -0600
@@ -20,7 +20,7 @@
 
 #define MONO_ARCH_FRAME_ALIGNMENT 16
 
-/* fixme: align to 16byte instead of 32byte (we align to 32byte to get 
+/* fixme: align to 16byte instead of 32byte (we align to 32byte to get
  * reproduceable results for benchmarks */
 #define MONO_ARCH_CODE_ALIGNMENT 32
 
@@ -56,9 +56,9 @@
  * - for variables which contain values of registers, use mgreg_t.
  * - for loading/saving pointers/ints, use the normal ppc_load_reg/ppc_save_reg ()
  *   macros.
- * - for loading/saving register sized quantities, use the ppc_ldr/ppc_str 
+ * - for loading/saving register sized quantities, use the ppc_ldr/ppc_str
  *   macros.
- * - make sure to not mix the two kinds of macros for the same memory location, 
+ * - make sure to not mix the two kinds of macros for the same memory location,
  *   since ppc is big endian, so a 8 byte store followed by a 4 byte load will
  *   load the upper 32 bit of the value.
  * - use OP_LOADR_MEMBASE/OP_STORER_MEMBASE to load/store register sized
@@ -146,16 +146,24 @@
 #define PPC_FIRST_FPARG_REG ppc_f1
 #define PPC_LAST_FPARG_REG ppc_f13
 #define PPC_PASS_STRUCTS_BY_VALUE 1
+#define PPC_PASS_SMALL_FLOAT_STRUCTS_IN_FR_REGS 0
 #else
 /* Linux */
 #ifdef __mono_ppc64__
 #define PPC_RET_ADDR_OFFSET 16
-#define PPC_STACK_PARAM_OFFSET 48
-#define PPC_MINIMAL_STACK_SIZE 48
+ // Power LE abvi2
+ #if (_CALL_ELF == 2)
+  #define PPC_STACK_PARAM_OFFSET 32
+  #define PPC_MINIMAL_STACK_SIZE 32
+  #define PPC_PASS_SMALL_FLOAT_STRUCTS_IN_FR_REGS 1
+ #else
+  #define PPC_MINIMAL_STACK_SIZE 48
+  #define PPC_STACK_PARAM_OFFSET 48
+  #define PPC_PASS_SMALL_FLOAT_STRUCTS_IN_FR_REGS 0
+ #endif
 #define PPC_MINIMAL_PARAM_AREA_SIZE 64
 #define PPC_LAST_FPARG_REG ppc_f13
 #define PPC_PASS_STRUCTS_BY_VALUE 1
-#define PPC_SMALL_RET_STRUCT_IN_REG 0
 #define PPC_THREAD_PTR_REG ppc_r13
 #else
 #define PPC_RET_ADDR_OFFSET 4
@@ -164,7 +172,7 @@
 #define PPC_MINIMAL_PARAM_AREA_SIZE 0
 #define PPC_LAST_FPARG_REG ppc_f8
 #define PPC_PASS_STRUCTS_BY_VALUE 0
-#define PPC_SMALL_RET_STRUCT_IN_REG 1
+#define PPC_PASS_SMALL_FLOAT_STRUCTS_IN_FR_REGS 0
 #define PPC_THREAD_PTR_REG ppc_r2
 #endif
 #define PPC_FIRST_ARG_REG ppc_r3
Index: mono-3.2.8+dfsg/mono/mini/cpu-ppc64.md
===================================================================
--- mono-3.2.8+dfsg.orig/mono/mini/cpu-ppc64.md 2014-05-03 16:42:55.000000000 -0600
+++ mono-3.2.8+dfsg/mono/mini/cpu-ppc64.md      2014-05-03 16:42:55.000000000 -0600
@@ -45,6 +45,8 @@
 #
 # See the code in mini-x86.c for more details on how the specifiers are used.
 #
+# WDS verify the 120 max length
+tailcall: len:120 clob:c
 memory_barrier: len:4
 nop: len:4
 relaxed_nop: len:4
