167#if defined( __ghs__ )
170 f32x2 f0 = __PSQ_LX(&rhs, 0, 0, 0);
171 f32x2 f1 = __PSQ_LX(&rhs, 16, 0, 0);
172 f32x2 f2 = __PSQ_LX(&rhs, 32, 0, 0);
173 f32x2 f4 = __PSQ_LX(&rhs, 8, 0, 0);
174 f32x2 f5 = __PSQ_LX(&rhs, 24, 0, 0);
175 f32x2 f6 = __PSQ_LX(&rhs, 40, 0, 0);
180 f8 = __PSQ_LX(&lhs, 0 * ROW + 0, 0, 0);
181 f9 = __PS_MULS0(f0, f8);
182 f10 = __PS_MULS0(f4, f8);
183 f9 = __PS_MADDS1(f1, f8, f9);
184 f10 = __PS_MADDS1(f5, f8, f10);
185 f8 = __PSQ_LX(&lhs, 0 * ROW + 8, 0, 0);
186 f9 = __PS_MADDS0(f2, f8, f9);
187 f10 = __PS_MADDS0(f6, f8, f10);
188 f10 = __PS_MADDS1(f7, f8, f10);
189 __PSQ_STX(
this, 0 * ROW + 0, f9, 0, 0);
190 __PSQ_STX(
this, 0 * ROW + 8, f10, 0, 0);
192 f8 = __PSQ_LX(&lhs, 1 * ROW + 0, 0, 0);
193 f9 = __PS_MULS0(f0, f8);
194 f10 = __PS_MULS0(f4, f8);
195 f9 = __PS_MADDS1(f1, f8, f9);
196 f10 = __PS_MADDS1(f5, f8, f10);
197 f8 = __PSQ_LX(&lhs, 1 * ROW + 8, 0, 0);
198 f9 = __PS_MADDS0(f2, f8, f9);
199 f10 = __PS_MADDS0(f6, f8, f10);
200 f10 = __PS_MADDS1(f7, f8, f10);
201 __PSQ_STX(
this, 1 * ROW + 0, f9, 0, 0);
202 __PSQ_STX(
this, 1 * ROW + 8, f10, 0, 0);
204 f8 = __PSQ_LX(&lhs, 2 * ROW + 0, 0, 0);
205 f9 = __PS_MULS0(f0, f8);
206 f10 = __PS_MULS0(f4, f8);
207 f9 = __PS_MADDS1(f1, f8, f9);
208 f10 = __PS_MADDS1(f5, f8, f10);
209 f8 = __PSQ_LX(&lhs, 2 * ROW + 8, 0, 0);
210 f9 = __PS_MADDS0(f2, f8, f9);
211 f10 = __PS_MADDS0(f6, f8, f10);
212 f10 = __PS_MADDS1(f7, f8, f10);
213 __PSQ_STX(
this, 2 * ROW + 0, f9, 0, 0);
214 __PSQ_STX(
this, 2 * ROW + 8, f10, 0, 0);
216 f8 = __PSQ_LX(&lhs, 3 * ROW + 0, 0, 0);
217 f9 = __PS_MULS0(f0, f8);
218 f10 = __PS_MULS0(f4, f8);
219 f9 = __PS_MADDS1(f1, f8, f9);
220 f10 = __PS_MADDS1(f5, f8, f10);
221 f8 = __PSQ_LX(&lhs, 3 * ROW + 8, 0, 0);
222 f9 = __PS_MADDS0(f2, f8, f9);
223 f10 = __PS_MADDS0(f6, f8, f10);
224 f10 = __PS_MADDS1(f7, f8, f10);
225 __PSQ_STX(
this, 3 * ROW + 0, f9, 0, 0);
226 __PSQ_STX(
this, 3 * ROW + 8, f10, 0, 0);
231 out.m00 = lhs.m00 * rhs.m00 + lhs.m01 * rhs.m10 + lhs.m02 * rhs.m20;
232 out.m01 = lhs.m00 * rhs.m01 + lhs.m01 * rhs.m11 + lhs.m02 * rhs.m21;
233 out.m02 = lhs.m00 * rhs.m02 + lhs.m01 * rhs.m12 + lhs.m02 * rhs.m22;
234 out.m03 = lhs.m00 * rhs.m03 + lhs.m01 * rhs.m13 + lhs.m02 * rhs.m23 + lhs.m03;
235 out.m10 = lhs.m10 * rhs.m00 + lhs.m11 * rhs.m10 + lhs.m12 * rhs.m20;
236 out.m11 = lhs.m10 * rhs.m01 + lhs.m11 * rhs.m11 + lhs.m12 * rhs.m21;
237 out.m12 = lhs.m10 * rhs.m02 + lhs.m11 * rhs.m12 + lhs.m12 * rhs.m22;
238 out.m13 = lhs.m10 * rhs.m03 + lhs.m11 * rhs.m13 + lhs.m12 * rhs.m23 + lhs.m13;
239 out.m20 = lhs.m20 * rhs.m00 + lhs.m21 * rhs.m10 + lhs.m22 * rhs.m20;
240 out.m21 = lhs.m20 * rhs.m01 + lhs.m21 * rhs.m11 + lhs.m22 * rhs.m21;
241 out.m22 = lhs.m20 * rhs.m02 + lhs.m21 * rhs.m12 + lhs.m22 * rhs.m22;
242 out.m23 = lhs.m20 * rhs.m03 + lhs.m21 * rhs.m13 + lhs.m22 * rhs.m23 + lhs.m23;
243 out.m30 = lhs.m30 * rhs.m00 + lhs.m31 * rhs.m10 + lhs.m32 * rhs.m20;
244 out.m31 = lhs.m30 * rhs.m01 + lhs.m31 * rhs.m11 + lhs.m32 * rhs.m21;
245 out.m32 = lhs.m30 * rhs.m02 + lhs.m31 * rhs.m12 + lhs.m32 * rhs.m22;
246 out.m33 = lhs.m30 * rhs.m03 + lhs.m31 * rhs.m13 + lhs.m32 * rhs.m23 + lhs.m33;
255#if defined( __ghs__ )
258 f32x2 f0 = __PSQ_LX(&rhs, 0, 0, 0);
259 f32x2 f1 = __PSQ_LX(&rhs, 16, 0, 0);
260 f32x2 f2 = __PSQ_LX(&rhs, 32, 0, 0);
261 f32x2 f3 = __PSQ_LX(&rhs, 48, 0, 0);
262 f32x2 f4 = __PSQ_LX(&rhs, 8, 0, 0);
263 f32x2 f5 = __PSQ_LX(&rhs, 24, 0, 0);
264 f32x2 f6 = __PSQ_LX(&rhs, 40, 0, 0);
265 f32x2 f7 = __PSQ_LX(&rhs, 56, 0, 0);
269 f8 = __PSQ_LX(&lhs, 0 * ROW + 0, 0, 0);
270 f9 = __PS_MULS0(f0, f8);
271 f10 = __PS_MULS0(f4, f8);
272 f9 = __PS_MADDS1(f1, f8, f9);
273 f10 = __PS_MADDS1(f5, f8, f10);
274 f8 = __PSQ_LX(&lhs, 0 * ROW + 8, 0, 0);
275 f9 = __PS_MADDS0(f2, f8, f9);
276 f10 = __PS_MADDS0(f6, f8, f10);
277 f9 = __PS_MADDS1(f3, f8, f9);
278 f10 = __PS_MADDS1(f7, f8, f10);
279 __PSQ_STX(
this, 0 * ROW + 0, f9, 0, 0);
280 __PSQ_STX(
this, 0 * ROW + 8, f10, 0, 0);
282 f8 = __PSQ_LX(&lhs, 1 * ROW + 0, 0, 0);
283 f9 = __PS_MULS0(f0, f8);
284 f10 = __PS_MULS0(f4, f8);
285 f9 = __PS_MADDS1(f1, f8, f9);
286 f10 = __PS_MADDS1(f5, f8, f10);
287 f8 = __PSQ_LX(&lhs, 1 * ROW + 8, 0, 0);
288 f9 = __PS_MADDS0(f2, f8, f9);
289 f10 = __PS_MADDS0(f6, f8, f10);
290 f9 = __PS_MADDS1(f3, f8, f9);
291 f10 = __PS_MADDS1(f7, f8, f10);
292 __PSQ_STX(
this, 1 * ROW + 0, f9, 0, 0);
293 __PSQ_STX(
this, 1 * ROW + 8, f10, 0, 0);
295 f8 = __PSQ_LX(&lhs, 2 * ROW + 0, 0, 0);
296 f9 = __PS_MULS0(f0, f8);
297 f10 = __PS_MULS0(f4, f8);
298 f9 = __PS_MADDS1(f1, f8, f9);
299 f10 = __PS_MADDS1(f5, f8, f10);
300 f8 = __PSQ_LX(&lhs, 2 * ROW + 8, 0, 0);
301 f9 = __PS_MADDS0(f2, f8, f9);
302 f10 = __PS_MADDS0(f6, f8, f10);
303 f9 = __PS_MADDS1(f3, f8, f9);
304 f10 = __PS_MADDS1(f7, f8, f10);
305 __PSQ_STX(
this, 2 * ROW + 0, f9, 0, 0);
306 __PSQ_STX(
this, 2 * ROW + 8, f10, 0, 0);
308 __PSQ_STX(
this, 3 * ROW + 0, f3, 0, 0);
309 __PSQ_STX(
this, 3 * ROW + 8, f7, 0, 0);
314 out.m00 = lhs.m00 * rhs.m00 + lhs.m01 * rhs.m10 + lhs.m02 * rhs.m20 + lhs.m03 * rhs.m30;
315 out.m01 = lhs.m00 * rhs.m01 + lhs.m01 * rhs.m11 + lhs.m02 * rhs.m21 + lhs.m03 * rhs.m31;
316 out.m02 = lhs.m00 * rhs.m02 + lhs.m01 * rhs.m12 + lhs.m02 * rhs.m22 + lhs.m03 * rhs.m32;
317 out.m03 = lhs.m00 * rhs.m03 + lhs.m01 * rhs.m13 + lhs.m02 * rhs.m23 + lhs.m03 * rhs.m33;
318 out.m10 = lhs.m10 * rhs.m00 + lhs.m11 * rhs.m10 + lhs.m12 * rhs.m20 + lhs.m13 * rhs.m30;
319 out.m11 = lhs.m10 * rhs.m01 + lhs.m11 * rhs.m11 + lhs.m12 * rhs.m21 + lhs.m13 * rhs.m31;
320 out.m12 = lhs.m10 * rhs.m02 + lhs.m11 * rhs.m12 + lhs.m12 * rhs.m22 + lhs.m13 * rhs.m32;
321 out.m13 = lhs.m10 * rhs.m03 + lhs.m11 * rhs.m13 + lhs.m12 * rhs.m23 + lhs.m13 * rhs.m33;
322 out.m20 = lhs.m20 * rhs.m00 + lhs.m21 * rhs.m10 + lhs.m22 * rhs.m20 + lhs.m23 * rhs.m30;
323 out.m21 = lhs.m20 * rhs.m01 + lhs.m21 * rhs.m11 + lhs.m22 * rhs.m21 + lhs.m23 * rhs.m31;
324 out.m22 = lhs.m20 * rhs.m02 + lhs.m21 * rhs.m12 + lhs.m22 * rhs.m22 + lhs.m23 * rhs.m32;
325 out.m23 = lhs.m20 * rhs.m03 + lhs.m21 * rhs.m13 + lhs.m22 * rhs.m23 + lhs.m23 * rhs.m33;
362 out.m00 = m.m11 * m.m22 * m.m33 + m.m12 * m.m23 * m.m31 + m.m13 * m.m21 * m.m32 - m.m11 * m.m23 * m.m32 - m.m12 * m.m21 * m.m33 - m.m13 * m.m22 * m.m31;
363 out.m01 = m.m01 * m.m23 * m.m32 + m.m02 * m.m21 * m.m33 + m.m03 * m.m22 * m.m31 - m.m01 * m.m22 * m.m33 - m.m02 * m.m23 * m.m31 - m.m03 * m.m21 * m.m32;
364 out.m02 = m.m01 * m.m12 * m.m33 + m.m02 * m.m13 * m.m31 + m.m03 * m.m11 * m.m32 - m.m01 * m.m13 * m.m32 - m.m02 * m.m11 * m.m33 - m.m03 * m.m12 * m.m31;
365 out.m03 = m.m01 * m.m13 * m.m22 + m.m02 * m.m11 * m.m23 + m.m03 * m.m12 * m.m21 - m.m01 * m.m12 * m.m23 - m.m02 * m.m13 * m.m21 - m.m03 * m.m11 * m.m22;
366 out.m10 = m.m10 * m.m23 * m.m32 + m.m12 * m.m20 * m.m33 + m.m13 * m.m22 * m.m30 - m.m10 * m.m22 * m.m33 - m.m12 * m.m23 * m.m30 - m.m13 * m.m20 * m.m32;
367 out.m11 = m.m00 * m.m22 * m.m33 + m.m02 * m.m23 * m.m30 + m.m03 * m.m20 * m.m32 - m.m00 * m.m23 * m.m32 - m.m02 * m.m20 * m.m33 - m.m03 * m.m22 * m.m30;
368 out.m12 = m.m00 * m.m13 * m.m32 + m.m02 * m.m10 * m.m33 + m.m03 * m.m12 * m.m30 - m.m00 * m.m12 * m.m33 - m.m02 * m.m13 * m.m30 - m.m03 * m.m10 * m.m32;
369 out.m13 = m.m00 * m.m12 * m.m23 + m.m02 * m.m13 * m.m20 + m.m03 * m.m10 * m.m22 - m.m00 * m.m13 * m.m22 - m.m02 * m.m10 * m.m23 - m.m03 * m.m12 * m.m20;
370 out.m20 = m.m10 * m.m21 * m.m33 + m.m11 * m.m23 * m.m30 + m.m13 * m.m20 * m.m31 - m.m10 * m.m23 * m.m31 - m.m11 * m.m20 * m.m33 - m.m13 * m.m21 * m.m30;
371 out.m21 = m.m00 * m.m23 * m.m31 + m.m01 * m.m20 * m.m33 + m.m03 * m.m21 * m.m30 - m.m00 * m.m21 * m.m33 - m.m01 * m.m23 * m.m30 - m.m03 * m.m20 * m.m31;
372 out.m22 = m.m00 * m.m11 * m.m33 + m.m01 * m.m13 * m.m30 + m.m03 * m.m10 * m.m31 - m.m00 * m.m13 * m.m31 - m.m01 * m.m10 * m.m33 - m.m03 * m.m11 * m.m30;
373 out.m23 = m.m00 * m.m13 * m.m21 + m.m01 * m.m10 * m.m23 + m.m03 * m.m11 * m.m20 - m.m00 * m.m11 * m.m23 - m.m01 * m.m13 * m.m20 - m.m03 * m.m10 * m.m21;
374 out.m30 = m.m10 * m.m22 * m.m31 + m.m11 * m.m20 * m.m32 + m.m12 * m.m21 * m.m30 - m.m10 * m.m21 * m.m32 - m.m11 * m.m22 * m.m30 - m.m12 * m.m20 * m.m31;
375 out.m31 = m.m00 * m.m21 * m.m32 + m.m01 * m.m22 * m.m30 + m.m02 * m.m20 * m.m31 - m.m00 * m.m22 * m.m31 - m.m01 * m.m20 * m.m32 - m.m02 * m.m21 * m.m30;
376 out.m32 = m.m00 * m.m12 * m.m31 + m.m01 * m.m10 * m.m32 + m.m02 * m.m11 * m.m30 - m.m00 * m.m11 * m.m32 - m.m01 * m.m12 * m.m30 - m.m02 * m.m10 * m.m31;
377 out.m33 = m.m00 * m.m11 * m.m22 + m.m01 * m.m12 * m.m20 + m.m02 * m.m10 * m.m21 - m.m00 * m.m12 * m.m21 - m.m01 * m.m10 * m.m22 - m.m02 * m.m11 * m.m20;