这是今天折腾自己的一个项目,,从数据采集--google翻译--wordpress发布,全部用python打通了。
最终代码如下:
import requests import json from bs4 import BeautifulSoup import xlwt from wordpress_xmlrpc import Client, WordPressPost, WordPressTerm from wordpress_xmlrpc.methods.posts import GetPosts, NewPost from wordpress_xmlrpc.methods.users import GetUserInfo from wordpress_xmlrpc.methods import taxonomies import csv def getHTMLText(url): try: r = requests.get(url, timeout=30) r.raise_for_status() return r.text except: print("Get HTML Text Failed!") return 0 def google_translate_EtoC(to_translate, from_language="en", to_language="ch-CN"): # 根据参数生产提交的网址 base_url = "https://translate.google.cn/m?hl={}&sl={}&ie=UTF-8&q={}" url = base_url.format(to_language, from_language, to_translate) # 获取网页 html = getHTMLText(url) if html: soup = BeautifulSoup(html, "html.parser") # 解析网页得到翻译结果 try: result = soup.find_all("div", {"class": "t0"})[0].text except: print("Translation Failed!") result = "" return result headers = { 'Host':'www.zalora.com.hk', 'Connection':'keep-alive', 'Cache-Control':'max-age=0', 'Accept': 'text/html, */*; q=0.01', 'X-Requested-With': 'XMLHttpRequest', 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.89 Safari/537.36', 'DNT':'1', 'Referer': 'http://example.com/', 'Accept-Encoding': 'gzip, deflate, sdch', 'Accept-Language': 'zh-CN,zh;q=0.8,ja;q=0.6' } file = xlwt.Workbook() table = file.add_sheet('info', cell_overwrite_ok=True) x=0 wp = Client('http://www.xxx.cc/xmlrpc.php', 'user', 'pass') base_url = "https://www.zalora.com.hk/data?method=getProductDetail&productUrl=" with open("urllist2.txt", "r") as f: for line in f.readlines(): url = base_url+line res = requests.get(url, headers=headers) json_data = json.loads(res.text) sku = json_data['data']['sku_config'] name = json_data['data']['product_name'] catogery = json_data['data']['bread_crumb'][2]['value'] price = json_data['data']['price'].replace("HK$","") productDesc = json_data['data']['short_description'] color = json_data['data']['attributes'][1]['value'] Care_label = json_data['data']['attributes'][2]['value'] model_body = json_data['data']['size_attributes'][0]['value'] model_garment = json_data['data']['size_attributes'][1]['value'] size = json_data['data']['size_attributes'][2]['value'] image = json_data['data']['product_images'][3]['product_image'] returnable = json_data['data']['return_info_text'] # file.save('02.xls') """ 发表博文 """ post = WordPressPost() post.title = google_translate_EtoC(name) post.content = catogery+price+google_translate_EtoC(productDesc) post.post_status = 'publish' post.terms_names = { 'post_tag': ['test', 'firstpost'], 'category': [' Titika', 'Titika'] } wp.call(NewPost(post))