#!/bin/bash
# -*- mode: python; -*-
import os
import re
import requests
url = "https://system76.com/laptops/kudu"
#
# Load the text from request as a true unicode string:
#
r = requests.get(url)
r.encoding = "UTF-8"
data = r.text # ok, data is a true unicode string
# translate offending characters in unicode:
charmap = {
0x2014: u'-', # em dash
0x201D: u'"', # comma quotation mark, double
# etc.
}
data = data.translate(charmap)
tdata = data.encode('ascii')
我得到的错误是:
./simple_wget
Traceback (most recent call last):
File "./simple_wget.py", line 25, in <module>
tdata = data.encode('ascii')
UnicodeEncodeError: 'ascii' codec can't encode character u'\u2013' in position 10166: ordinal not in range(128)