mysql mongodb性能测试

对mysql和mongodb做一个简单的调研。

测试环境

数据库版本

mongodb: 3.0.5 无任何优化,单机测试
mysql: 5.6.26 无任何优化,单机测试

环境

CPU: 2.2 GHz Intel Core i7
内存: 16G DDR3
操作系统: OS X Yosemite 10.10.5
python: 3.5.0

数据库接口驱动

mongodb: PyMongo 3.0.3
mysql: PyMySQL 0.6.6


测试指标

  • 插入
  • 排序
  • 搜索

测试脚本

测试思路如下:

  • 清空数据表
  • 插入100次数据,每次10000条
  • 统计每次插入的时间
  • 每次插入后,取出排序后最大的10条记录,并计时
  • 每次插入后,随机搜索10000条,并计时,统计QPS

mysql测试脚本

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import pymysql
import time
from random import randint

EACH_INSERT = 10000
INSERT_COUNT = 1000


def md5(s):
import hashlib
m = hashlib.md5()
m.update(str(s).encode('utf-8'))
return m.hexdigest()

# init
conn = pymysql.connect(host='127.0.0.1', port=3306, user='root', passwd=None, db='test')
cur = conn.cursor()
cur.execute("TRUNCATE test.test_speed")
try:
# 加入这行速度有质的飞跃,本来一晚上只能跑到75w量级,现在几分钟就跑到100w量级了
cur.execute("ALTER TABLE `test`.`test_speed` ADD INDEX `index_value` (`value` ASC)")
except:
pass
file = open('mysql_result_' + str(INSERT_COUNT) + '.txt', 'w')

for i in range(INSERT_COUNT):
# insert
l = []
for j in range(EACH_INSERT):
l.append(md5(str(i) + '_' + str(j)))
pre = time.time()
for k in l:
sta = cur.execute("INSERT INTO `test_speed` (`value`) VALUES ('{0}')".format(k))
conn.commit()
after = time.time()
print("{0}: {1}".format(i, time.time() - pre))
insert_time = after - pre

# sort
pre = time.time()
cur.execute("SELECT * FROM test_speed ORDER BY value DESC limit 0, 10")
after = time.time()
print("sort time: {0}".format(time.time() - pre))
sort_time = after - pre

# search
l = []
num = EACH_INSERT
for k in range(num):
s = str(randint(0, i)) + '_' + str(randint(0, EACH_INSERT - 1))
l.append(md5(s))
pre = time.time()
for k in l:
cur.execute("SELECT * FROM test_speed WHERE value = '{0}'".format(k))
for v in cur:
value = v[1]
after = time.time()
print("search time({0}): {1}".format(num, time.time() - pre))
search_time = after - pre

# write result
file.write("{0} {1} {2} {3}\n".format((i + 1) * EACH_INSERT, insert_time, sort_time, search_time))


cur.close()
conn.close()

if __name__ == '__main__':
pass

mongodb测试脚本

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import pymongo
import time
from random import randint

EACH_INSERT = 10000
INSERT_COUNT = 1000


def md5(s):
import hashlib
m = hashlib.md5()
m.update(str(s).encode('utf-8'))
return m.hexdigest()

# init
client = pymongo.MongoClient('localhost', 27017)
db = client.test
collection = db['test_speed']
collection.drop()
collection.create_index([('value', pymongo.DESCENDING)])
file = open('mongo_result_' + str(INSERT_COUNT) + '.txt', 'a')

for i in range(INSERT_COUNT):
# insert
l = []
for j in range(EACH_INSERT):
l.append(md5(str(i) + '_' + str(j)))
pre = time.time()
for k in l:
collection.insert({'value': k})
after = time.time()
print("insert time {0}: {1}".format(i, after - pre))
insert_time = after - pre

# sort
pre = time.time()
res = collection.find().sort('value', pymongo.DESCENDING).limit(10)
after = time.time()
print("sort time: {0}".format(after - pre))
sort_time = after - pre

# search
l = []
num = EACH_INSERT
for k in range(num):
s = str(randint(0, i)) + '_' + str(randint(0, EACH_INSERT - 1))
l.append(md5(s))
pre = time.time()
for k in l:
res = collection.find({'value': k})
for v in res:
value = v['value']
after = time.time()
print("search time({0}): {1}".format(num, after - pre))
search_time = after - pre

# write result
file.write("{0} {1} {2} {3}\n".format((i + 1) * EACH_INSERT, insert_time, sort_time, search_time))

file.close()

if __name__ == '__main__':
pass

结果展示

作图使用的是matlab

matlab作图脚本

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
function plot_result(size)
mysql = load(['mysql_result_', num2str(size), '.txt']);
mongo = load(['mongo_result_', num2str(size), '.txt']);
data_size = mysql(:, 1);
mongo_insert_vec = mongo(:, 2);
mysql_insert_vec = mysql(:, 2);
mongo_sort_vec = mongo(:, 3);
mysql_sort_vec = mysql(:, 3);
mongo_search_vec = mongo(:, 4);
mysql_search_vec = mysql(:, 4);

% insert
plot(data_size, mysql_insert_vec);
grid on;
hold on;
plot(data_size, mongo_insert_vec);
title([num2str(size), 'w数据插入比较']);
legend('mysql', 'mongodb');
xlabel('data size');
ylabel('insert time per 10000 query');
saveas(gcf, ['insert_comp_', num2str(size), '.jpg'])
hold off;

% 排序
plot(data_size, mysql_sort_vec);
grid on;
hold on;
plot(data_size, mongo_sort_vec);
title('取最大的10个值');
legend('mysql', 'mongodb');
xlabel('data size');
ylabel('second');
saveas(gcf, ['sort_comp_', num2str(size), '.jpg'])
hold off;

% 搜索
for i = 1:size
mysql_search_vec(i) = (i * 10000 / 100) / mysql_search_vec(i);
mongo_search_vec(i) = (i * 10000 / 100) / mongo_search_vec(i);
end
plot(data_size, mysql_search_vec);
grid on;
hold on;
plot(data_size, mongo_search_vec);
title('QPS');
legend('mysql', 'mongodb');
xlabel('data size');
ylabel('query per second');
saveas(gcf, ['search_comp_', num2str(size), '.jpg'])
hold off;
end

插入比较

100w数据

500w数据

1000w数据

排序比较

100w数据

500w数据

1000w数据

搜索比较

100w数据

500w数据

1000w数据


参考资料

http://blog.csdn.net/clh604/article/details/19608869
http://www.cnblogs.com/inrie/archive/2011/02/22/1961415.html