Home >  > 采集商品发布到wordpress

采集商品发布到wordpress

0

这是今天折腾自己的一个项目,,从数据采集--google翻译--wordpress发布,全部用python打通了。

最终代码如下:

import requests
import json
from bs4 import BeautifulSoup
import xlwt
from wordpress_xmlrpc import Client, WordPressPost, WordPressTerm
from wordpress_xmlrpc.methods.posts import GetPosts, NewPost
from wordpress_xmlrpc.methods.users import GetUserInfo
from wordpress_xmlrpc.methods import taxonomies
import csv

def getHTMLText(url):
    try:
        r = requests.get(url, timeout=30)
        r.raise_for_status()
        return r.text
    except:
        print("Get HTML Text Failed!")
        return 0


def google_translate_EtoC(to_translate, from_language="en", to_language="ch-CN"):
    # 根据参数生产提交的网址
    base_url = "https://translate.google.cn/m?hl={}&sl={}&ie=UTF-8&q={}"
    url = base_url.format(to_language, from_language, to_translate)

    # 获取网页
    html = getHTMLText(url)
    if html:
        soup = BeautifulSoup(html, "html.parser")

    # 解析网页得到翻译结果
    try:
        result = soup.find_all("div", {"class": "t0"})[0].text
    except:
        print("Translation Failed!")
        result = ""

    return result

headers = { 'Host':'www.zalora.com.hk',
                    'Connection':'keep-alive',
                    'Cache-Control':'max-age=0',
                    'Accept': 'text/html, */*; q=0.01',
                    'X-Requested-With': 'XMLHttpRequest',
                    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.89 Safari/537.36',
                    'DNT':'1',
                    'Referer': 'http://example.com/',
                    'Accept-Encoding': 'gzip, deflate, sdch',
                    'Accept-Language': 'zh-CN,zh;q=0.8,ja;q=0.6'
}

file = xlwt.Workbook()
table = file.add_sheet('info', cell_overwrite_ok=True)
x=0
wp = Client('http://www.xxx.cc/xmlrpc.php', 'user', 'pass')
base_url = "https://www.zalora.com.hk/data?method=getProductDetail&productUrl="
with open("urllist2.txt", "r") as f:
    for line in f.readlines():
        url = base_url+line
        res = requests.get(url, headers=headers)
        json_data = json.loads(res.text)
        sku = json_data['data']['sku_config']
        name = json_data['data']['product_name']
        catogery = json_data['data']['bread_crumb'][2]['value']
        price = json_data['data']['price'].replace("HK$","")
        productDesc = json_data['data']['short_description']
        color = json_data['data']['attributes'][1]['value']
        Care_label = json_data['data']['attributes'][2]['value']
        model_body = json_data['data']['size_attributes'][0]['value']
        model_garment = json_data['data']['size_attributes'][1]['value']
        size = json_data['data']['size_attributes'][2]['value']
        image = json_data['data']['product_images'][3]['product_image']
        returnable = json_data['data']['return_info_text']        
    # file.save('02.xls')
        """
        发表博文
        """
        post = WordPressPost()
        post.title = google_translate_EtoC(name)
        post.content = catogery+price+google_translate_EtoC(productDesc)
        post.post_status = 'publish'
        post.terms_names = {
            'post_tag': ['test', 'firstpost'],
            'category': [' Titika', 'Titika']
        }

        wp.call(NewPost(post))

暧昧帖

本文暂无标签

发表评论

*

*