资源简介
高斯消去法(LU分解)并行算法:设计实现SSE算法,加速计算过程。包括代码以及说明文档。
代码片段和文件信息
// SSE_LU.cpp : 定义控制台应用程序的入口点。
//
#include “stdafx.h“
#include
#include
#include
#include
#include
#include
#include
#include
#define N 4096
using namespace std;
float** serial(float **A) //************************************串行的高斯*******************************************
{
for (int k = 0; k < N; k++)
{
float tmp =A[k][k];
for (int j = k; j < N; j++)
{
A[k][j] = A[k][j] / tmp;
}
for (int i = k + 1; i < N; i++)
{
float tmp2 = A[i][k];
for (int j = k + 1; j < N; j++)
{
A[i][j] = A[i][j] - tmp2 * A[k][j];
}
A[i][k] = 0;
}
}
return A;
}
void parallel(float **A) //************************************并行的高斯*****************************************
{
__m128 t1 t2 t3 t4;
for (int k = 0; k < N; k++)
{
float tmp[4] = { A[k][k] A[k][k] A[k][k] A[k][k] };
t1 = _mm_loadu_ps(tmp);
for (int j = N - 4; j >= k; j -= 4) //从后向前每次取四个
{
t2 = _mm_loadu_ps(A[k] + j);
t3 = _mm_div_ps(t2 t1);//除法
_mm_storeu_ps(A[k] + j t3);
}
if (k % 4 != (N % 4)) //处理不能被4整除的元素
{
for (int j = k; j % 4 != ( N% 4); j++)
{
A[k][j] = A[k][j] / tmp[0];
}
}
for (int j = (N % 4) - 1; j >= 0; j--)
{
A[k][j] = A[k][j] / tmp[0];
}
for (int i = k + 1; i < N; i++)
{
float tmp[4] = { A[i][k] A[i][k] A[i][k] A[i][k] };
t1 = _mm_loadu_ps(tmp);
for (int j = N - 4; j >k; j -= 4)
{
t2 = _mm_loadu_ps(A[i] + j);
t3 = _mm_loadu_ps(A[k] + j);
t4 = _mm_sub_ps(t2 _mm_mul_ps(t1 t3)); //减法
_mm_storeu_ps(A[i] + j t4);
}
for (int j = k + 1; j % 4 != (N % 4); j++)
{
A[i][j] = A[i][j] - A[i][k] * A[k][j];
}
A[i][k] = 0;
}
}
}
void print(float **Aint num) //**********************************打印矩阵*****************************************
{
for (int i = 0; i < num; i++)
{
for (int j = 0; j < num; j++)
{
cout << A[i][j]<<“ “;
}
cout << endl;
}
}
int _tmain() //*************************************主函数入口********************************************
{
srand((unsigned)time(NULL)); //生成种子
float **A = new float*[N];
float **A2 = new float*[N];
for (int i = 0; i {
A[i] = new float[N];
A2[i] = A[i];
}
cout << “*************生成初始随机矩阵************“ << endl;
for (int i = 0; i < 1; i++)
{
for (int j = 0; j < N; j++)
{
A[i][j] = rand() % 90; //取值为1-90之间
}
}
print(A10);
cout << “***************串行的高斯**************“ << endl;
clock_t clockBegin clockEnd;
clockBegin = clock(); //开始计时
float **B = serial(A);
clockEnd = clock();
print(A10);
cout << “总共耗时: “ << clockEnd - clockBegin << “ms“ << endl;
cout << “***************并行的高斯**************“ << endl;
clockBegin = clock(); //开始计时
parallel(A2);
clockEnd = clock();
print(A210);
cout << “总共耗时: “ << clockEnd - clockBegin << “ms“ << endl;
return 属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
文件 156783 2016-07-19 14:34 高斯消去法SSE并行化.docx
文件 74752 2016-03-29 17:25 SSE_LU\Debug\SSE_LU.exe
文件 708952 2016-03-29 17:25 SSE_LU\Debug\SSE_LU.ilk
文件 1298432 2016-03-29 17:25 SSE_LU\Debug\SSE_LU.pdb
文件 3080192 2016-03-29 17:25 SSE_LU\ipch\sse_lu-5967a1ae\sse_lu-b3fb2d97.ipch
文件 1735 2016-03-29 17:25 SSE_LU\SSE_LU\Debug\SSE_LU.log
文件 165993 2016-03-29 17:25 SSE_LU\SSE_LU\Debug\SSE_LU.obj
文件 1703936 2016-03-29 15:11 SSE_LU\SSE_LU\Debug\SSE_LU.pch
文件 1374 2016-03-29 17:25 SSE_LU\SSE_LU\Debug\SSE_LU.tlog\cl.command.1.tlog
文件 30640 2016-03-29 17:25 SSE_LU\SSE_LU\Debug\SSE_LU.tlog\CL.read.1.tlog
文件 910 2016-03-29 17:25 SSE_LU\SSE_LU\Debug\SSE_LU.tlog\CL.write.1.tlog
文件 1208 2016-03-29 17:25 SSE_LU\SSE_LU\Debug\SSE_LU.tlog\li
文件 2738 2016-03-29 17:25 SSE_LU\SSE_LU\Debug\SSE_LU.tlog\li
文件 480 2016-03-29 17:25 SSE_LU\SSE_LU\Debug\SSE_LU.tlog\li
文件 188 2016-03-29 17:25 SSE_LU\SSE_LU\Debug\SSE_LU.tlog\SSE_LU.lastbuildstate
文件 11920 2016-03-29 15:11 SSE_LU\SSE_LU\Debug\stdafx.obj
文件 764928 2016-03-29 17:25 SSE_LU\SSE_LU\Debug\vc120.idb
文件 487424 2016-03-29 17:25 SSE_LU\SSE_LU\Debug\vc120.pdb
文件 1503 2016-03-29 14:05 SSE_LU\SSE_LU\ReadMe.txt
文件 3110 2016-03-29 17:25 SSE_LU\SSE_LU\SSE_LU.cpp
文件 4531 2016-03-29 14:05 SSE_LU\SSE_LU\SSE_LU.vcxproj
文件 1313 2016-03-29 14:05 SSE_LU\SSE_LU\SSE_LU.vcxproj.filters
文件 212 2016-03-29 14:05 SSE_LU\SSE_LU\stdafx.cpp
文件 234 2016-03-29 14:05 SSE_LU\SSE_LU\stdafx.h
文件 236 2016-03-29 14:05 SSE_LU\SSE_LU\targetver.h
文件 33619968 2016-03-29 17:34 SSE_LU\SSE_LU.sdf
文件 964 2016-03-29 14:05 SSE_LU\SSE_LU.sln
..A..H. 13824 2016-03-29 17:34 SSE_LU\SSE_LU.v12.suo
目录 0 2016-03-29 17:25 SSE_LU\SSE_LU\Debug\SSE_LU.tlog
目录 0 2016-03-29 14:05 SSE_LU\ipch\sse_lu-5967a1ae
............此处省略8个文件信息
相关资源
- Tesseract-ocr快速训练语言库批处理文件
- WindowsServer2003.WindowsXP-KB926139-v2-x64-EN
- Genome doubling and chromosome elimination wit
- MD5暴力破解程序 用SSE2 CPU加速指令集
- Probability Essentials
- AssetStudio.x64.v0.12.65.zip 提取游戏资源工
- IDCardOCR_China 基于tesseract,实现摄像头
- 三款主题VS2010
- Simple Assembly Explorer v1.14.4.NET Class Edi
- PSSE程序操作手册(中文版)很强大)
-
A triphenylamine-ba
sed four-armed molecule - WindowsServer2003-KB914961-SP2-x86-CHSIE6升级
- 四款vssettings配置文件
- VS皮肤 vssettings
- tesseract-ocr-3.01
- Tesseract(DLL)
- Evaluation Quality of the Elderly‘s Living E
- 12 More Essential Skills for Software Architec
- Terrain Assets.unitypackageUnity3d官方资源包
- UnityStudio(AssetStudio)X64.v0.10.0.69中文
- icm2017dpassengerthroughput.zip
- jTessBoxEditor.zip
- tessent MemoryBist UserGuide March 2019
- PSS/E 动态仿真完全手册
- Essential Grammar in Use 3rd ed [Raymond Murph
- 周克敏鲁棒控制 Essentials of Robust Con
- Assembly-CSharp.rar
- Introduction to 64 Bit Intel Assembly Language
- Odoo 11 Development Essentials(3rd) epub
- Mastering Assembly Programming 无水印pdf
川公网安备 51152502000135号
评论
共有 条评论