Home >  > Sqlite记录采集的记录

Sqlite记录采集的记录

1import sqlite3
2con=sqlite3.connect('posted.db')
3print('Opened database successfully')
4cursor = con.cursor()
5# cur.execute('''CREATE TABLE IF NOT EXISTS herbel
6        #(ID INT PRIMARY KEY    NOT NULL,
7        #ch_name        TEXT    NOT NULL,
8        # age            INT     NOT NULL,
9        # address        CHAR(50),
10        # salary         REAL
11        # );''')
12cursor.execute('CREATE TABLE IF NOT EXISTS posturl(id INTEGER PRIMARY KEY,url TEXT,url_md5 TEXT)') #插入有逗号的内容,用两个单引号替换一个单引号
13print('Table created successfully')
14con.commit()
15#connection.close()
16 
17#查看数据库中是否已经存在url
18def get_data_from_db(url_md5):
19    cursor.execute('''select * from posturl where url_md5='%s' ''' % (url_md5))
20    results = cursor.fetchall()
21    if len(results) > 0:
22        return results
23    else:
24        return None
25 
26#将采集完成的url写入本地数据posted数据库中,防止重复采集。
27def posted_url_to_db(url,url_md5):
28    # url_md5 = get_md5(url)
29    sql ="INSERT or IGNORE INTO posturl(url,url_md5) VALUES ('%s','%s')"%(url,url_md5) #最前面的引号要变成双引号,ignore表示忽略重复数据,不过先要设定索引
30    try:
31        cursor.execute(sql)
32        tag_id = cursor.lastrowid
33        con.commit()
34        print('url为{}写入数据库成功!md5为{}'.format(url,url_md5))
35        print("*"*150)
36    except:
37        con.rollback()
38        traceback.print_exc()
39        print('url为{}写入数据库失败!'.format(url))
40        tag_id = 0
41        print("*"*150)
42 
43cursor.close()
44con.close()

暧昧帖

本文暂无标签