1 | import sqlite3 |
2 | con=sqlite3.connect( 'posted.db' ) |
3 | print ( 'Opened database successfully' ) |
4 | cursor = con.cursor() |
5 | # cur.execute( '' 'CREATE TABLE IF NOT EXISTS herbel |
6 | #(ID INT PRIMARY KEY NOT NULL, |
7 | #ch_name TEXT NOT NULL, |
8 | # age INT NOT NULL, |
9 | # address CHAR(50), |
10 | # salary REAL |
11 | # ); '' ') |
12 | cursor.execute( 'CREATE TABLE IF NOT EXISTS posturl(id INTEGER PRIMARY KEY,url TEXT,url_md5 TEXT)' ) #插入有逗号的内容,用两个单引号替换一个单引号 |
13 | print ( 'Table created successfully' ) |
14 | con.commit() |
15 | #connection.close() |
16 |
17 | #查看数据库中是否已经存在url |
18 | def get_data_from_db(url_md5): |
19 | cursor.execute( '' 'select * from posturl where url_md5=' %s ' ' '' % (url_md5)) |
20 | results = cursor.fetchall() |
21 | if len(results) > 0: |
22 | return results |
23 | else : |
24 | return None |
25 |
26 | #将采集完成的url写入本地数据posted数据库中,防止重复采集。 |
27 | def posted_url_to_db(url,url_md5): |
28 | # url_md5 = get_md5(url) |
29 | sql = "INSERT or IGNORE INTO posturl(url,url_md5) VALUES ('%s','%s')" %(url,url_md5) #最前面的引号要变成双引号,ignore表示忽略重复数据,不过先要设定索引 |
30 | try: |
31 | cursor.execute(sql) |
32 | tag_id = cursor.lastrowid |
33 | con.commit() |
34 | print ( 'url为{}写入数据库成功!md5为{}' .format(url,url_md5)) |
35 | print ( "*" *150) |
36 | except: |
37 | con.rollback() |
38 | traceback.print_exc() |
39 | print ( 'url为{}写入数据库失败!' .format(url)) |
40 | tag_id = 0 |
41 | print ( "*" *150) |
42 |
43 | cursor.close() |
44 | con.close() |