资源简介
高斯消去法(LU分解)并行算法:设计实现SSE算法,加速计算过程。包括代码以及说明文档。
代码片段和文件信息
// SSE_LU.cpp : 定义控制台应用程序的入口点。
//
#include “stdafx.h“
#include
#include
#include
#include
#include
#include
#include
#include
#define N 4096
using namespace std;
float** serial(float **A) //************************************串行的高斯*******************************************
{
for (int k = 0; k < N; k++)
{
float tmp =A[k][k];
for (int j = k; j < N; j++)
{
A[k][j] = A[k][j] / tmp;
}
for (int i = k + 1; i < N; i++)
{
float tmp2 = A[i][k];
for (int j = k + 1; j < N; j++)
{
A[i][j] = A[i][j] - tmp2 * A[k][j];
}
A[i][k] = 0;
}
}
return A;
}
void parallel(float **A) //************************************并行的高斯*****************************************
{
__m128 t1 t2 t3 t4;
for (int k = 0; k < N; k++)
{
float tmp[4] = { A[k][k] A[k][k] A[k][k] A[k][k] };
t1 = _mm_loadu_ps(tmp);
for (int j = N - 4; j >= k; j -= 4) //从后向前每次取四个
{
t2 = _mm_loadu_ps(A[k] + j);
t3 = _mm_div_ps(t2 t1);//除法
_mm_storeu_ps(A[k] + j t3);
}
if (k % 4 != (N % 4)) //处理不能被4整除的元素
{
for (int j = k; j % 4 != ( N% 4); j++)
{
A[k][j] = A[k][j] / tmp[0];
}
}
for (int j = (N % 4) - 1; j >= 0; j--)
{
A[k][j] = A[k][j] / tmp[0];
}
for (int i = k + 1; i < N; i++)
{
float tmp[4] = { A[i][k] A[i][k] A[i][k] A[i][k] };
t1 = _mm_loadu_ps(tmp);
for (int j = N - 4; j >k; j -= 4)
{
t2 = _mm_loadu_ps(A[i] + j);
t3 = _mm_loadu_ps(A[k] + j);
t4 = _mm_sub_ps(t2 _mm_mul_ps(t1 t3)); //减法
_mm_storeu_ps(A[i] + j t4);
}
for (int j = k + 1; j % 4 != (N % 4); j++)
{
A[i][j] = A[i][j] - A[i][k] * A[k][j];
}
A[i][k] = 0;
}
}
}
void print(float **Aint num) //**********************************打印矩阵*****************************************
{
for (int i = 0; i < num; i++)
{
for (int j = 0; j < num; j++)
{
cout << A[i][j]<<“ “;
}
cout << endl;
}
}
int _tmain() //*************************************主函数入口********************************************
{
srand((unsigned)time(NULL)); //生成种子
float **A = new float*[N];
float **A2 = new float*[N];
for (int i = 0; i {
A[i] = new float[N];
A2[i] = A[i];
}
cout << “*************生成初始随机矩阵************“ << endl;
for (int i = 0; i < 1; i++)
{
for (int j = 0; j < N; j++)
{
A[i][j] = rand() % 90; //取值为1-90之间
}
}
print(A10);
cout << “***************串行的高斯**************“ << endl;
clock_t clockBegin clockEnd;
clockBegin = clock(); //开始计时
float **B = serial(A);
clockEnd = clock();
print(A10);
cout << “总共耗时: “ << clockEnd - clockBegin << “ms“ << endl;
cout << “***************并行的高斯**************“ << endl;
clockBegin = clock(); //开始计时
parallel(A2);
clockEnd = clock();
print(A210);
cout << “总共耗时: “ << clockEnd - clockBegin << “ms“ << endl;
return
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
文件 156783 2016-07-19 14:34 高斯消去法SSE并行化.docx
文件 74752 2016-03-29 17:25 SSE_LU\Debug\SSE_LU.exe
文件 708952 2016-03-29 17:25 SSE_LU\Debug\SSE_LU.ilk
文件 1298432 2016-03-29 17:25 SSE_LU\Debug\SSE_LU.pdb
文件 3080192 2016-03-29 17:25 SSE_LU\ipch\sse_lu-5967a1ae\sse_lu-b3fb2d97.ipch
文件 1735 2016-03-29 17:25 SSE_LU\SSE_LU\Debug\SSE_LU.log
文件 165993 2016-03-29 17:25 SSE_LU\SSE_LU\Debug\SSE_LU.obj
文件 1703936 2016-03-29 15:11 SSE_LU\SSE_LU\Debug\SSE_LU.pch
文件 1374 2016-03-29 17:25 SSE_LU\SSE_LU\Debug\SSE_LU.tlog\cl.command.1.tlog
文件 30640 2016-03-29 17:25 SSE_LU\SSE_LU\Debug\SSE_LU.tlog\CL.read.1.tlog
文件 910 2016-03-29 17:25 SSE_LU\SSE_LU\Debug\SSE_LU.tlog\CL.write.1.tlog
文件 1208 2016-03-29 17:25 SSE_LU\SSE_LU\Debug\SSE_LU.tlog\li
文件 2738 2016-03-29 17:25 SSE_LU\SSE_LU\Debug\SSE_LU.tlog\li
文件 480 2016-03-29 17:25 SSE_LU\SSE_LU\Debug\SSE_LU.tlog\li
文件 188 2016-03-29 17:25 SSE_LU\SSE_LU\Debug\SSE_LU.tlog\SSE_LU.lastbuildstate
文件 11920 2016-03-29 15:11 SSE_LU\SSE_LU\Debug\stdafx.obj
文件 764928 2016-03-29 17:25 SSE_LU\SSE_LU\Debug\vc120.idb
文件 487424 2016-03-29 17:25 SSE_LU\SSE_LU\Debug\vc120.pdb
文件 1503 2016-03-29 14:05 SSE_LU\SSE_LU\ReadMe.txt
文件 3110 2016-03-29 17:25 SSE_LU\SSE_LU\SSE_LU.cpp
文件 4531 2016-03-29 14:05 SSE_LU\SSE_LU\SSE_LU.vcxproj
文件 1313 2016-03-29 14:05 SSE_LU\SSE_LU\SSE_LU.vcxproj.filters
文件 212 2016-03-29 14:05 SSE_LU\SSE_LU\stdafx.cpp
文件 234 2016-03-29 14:05 SSE_LU\SSE_LU\stdafx.h
文件 236 2016-03-29 14:05 SSE_LU\SSE_LU\targetver.h
文件 33619968 2016-03-29 17:34 SSE_LU\SSE_LU.sdf
文件 964 2016-03-29 14:05 SSE_LU\SSE_LU.sln
..A..H. 13824 2016-03-29 17:34 SSE_LU\SSE_LU.v12.suo
目录 0 2016-03-29 17:25 SSE_LU\SSE_LU\Debug\SSE_LU.tlog
目录 0 2016-03-29 14:05 SSE_LU\ipch\sse_lu-5967a1ae
............此处省略8个文件信息
相关资源
- Essential .net(.net 本质论) 中文版
- tesseract-ocr- 的字母数字识别
- Automotive Development Processes
- IBMx3250M5_5458U盘安装WindowsServer2012R2说明
- Modeling Business Processes: A Petri Net-Orien
- PSSE 34.1.1 图形界面用户指南
- 《Unity Animation Essentials》《Unity游戏动
- Practical Foundations of Windows Debugging Dis
- tesseract_lib_vs2010编译好的dll库和lib文件
- PSSEM-2000S 电力监控系统网络安全监测
- 习题解答:Probability Statistics and Rand
- Probability Statistics and Random Processes fo
- Essentials of Stochastic Processes
- Operating System Concepts Essentials 2nd Editi
- tesserocr-2.4.0-cp37-cp37m-win32.whl
- Essential Linux Device Drivers + 源码
- Blind contrast enhancement assessment by gradi
- Samuel Karlin Taylor H.M. A second course in S
- tesseract环境
- tesseract-ocr2500常用字宋体字库
- Probability Statistics and Random Processes fo
- Essentials of Computer Architecture 2nd Editio
- SVA_ The Power of Assertions in SystemVerilog
- RabbitMQ Essentials
- 秋无痕一键优化WindowsServer2008(64位)
- ODOO_12_DEVELOPMENT_ESSENTIALS_FOURTH_EDITION.
- Probability & Random Processes for Electrical
- Essential Linux Device Drivers.pdf
- IP Messenger飞鸽传书2019最新源码.zip
- 汇编语言实现二进制,十进制,十六
评论
共有 条评论