import sqlite3 con=sqlite3.connect('posted.db') print('Opened database successfully') cursor = con.cursor() # cur.execute('''CREATE TABLE IF NOT EXISTS herbel #(ID INT PRIMARY KEY NOT NULL, #ch_name TEXT NOT NULL, # age INT NOT NULL, # address CHAR(50), # salary REAL # );''') cursor.execute('CREATE TABLE IF NOT EXISTS posturl(id INTEGER PRIMARY KEY,url TEXT,url_md5 TEXT)') #插入有逗号的内容,用两个单引号替换一个单引号 print('Table created successfully') con.commit() #connection.close() #查看数据库中是否已经存在url def get_data_from_db(url_md5): cursor.execute('''select * from posturl where url_md5='%s' ''' % (url_md5)) results = cursor.fetchall() if len(results) > 0: return results else: return None #将采集完成的url写入本地数据posted数据库中,防止重复采集。 def posted_url_to_db(url,url_md5): # url_md5 = get_md5(url) sql ="INSERT or IGNORE INTO posturl(url,url_md5) VALUES ('%s','%s')"%(url,url_md5) #最前面的引号要变成双引号,ignore表示忽略重复数据,不过先要设定索引 try: cursor.execute(sql) tag_id = cursor.lastrowid con.commit() print('url为{}写入数据库成功!md5为{}'.format(url,url_md5)) print("*"*150) except: con.rollback() traceback.print_exc() print('url为{}写入数据库失败!'.format(url)) tag_id = 0 print("*"*150) cursor.close() con.close()