python类和标准库

Python类 🔗︎

定义类 🔗︎

class MyClass:
    """A simple example class"""
    i = 12345

    def f(self):
        return 'hello world'

引用类的属性和方法 🔗︎

MyClass.i和 MyClass.f MyClass._doc => “A simple example class”

定义init方法,设置默认属性 🔗︎

def __init__(self):
    self.data = []

传递给类的参数会传递给init:

>>> class Complex:
...    def __init__(self, realpart, imagpart):
...        self.r = realpart
...        self.i = imagpart
...
>>> x = Complex(3.0, -4.5)
>>> x.r, x.i
(3.0, -4.5)

类共享变量和实例变量 🔗︎

class Dog:

    kind = 'canine'        # 类共享变量

    def __init__(self, name):
        self.name = name    # 实例变量

>>> d = Dog('Fido')
>>> e = Dog('Buddy')
>>> d.kind                  # shared by all dogs
'canine'
>>> e.kind                  # shared by all dogs
'canine'
>>> d.name                  # unique to d
'Fido'
>>> e.name                  # unique to e
'Buddy'

注意引用变量避免被所有实例共享 🔗︎

class Dog:

    tricks = []            # mistaken use of a class variable

    def __init__(self, name):
        self.name = name

    def add_trick(self, trick):
        self.tricks.append(trick)

>>> d = Dog('Fido')
>>> e = Dog('Buddy')
>>> d.add_trick('roll over')
>>> e.add_trick('play dead')
>>> d.tricks                # 这里tricks被所有实例共享了
['roll over', 'play dead']

正确的用法:

class Dog:

    def __init__(self, name):
        self.name = name
        self.tricks = []    # creates a new empty list for each dog

    def add_trick(self, trick):
        self.tricks.append(trick)

>>> d = Dog('Fido')
>>> e = Dog('Buddy')
>>> d.add_trick('roll over')
>>> e.add_trick('play dead')
>>> d.tricks
['roll over']
>>> e.tricks
['play dead']

函数的定义可以在类外部(不推荐) 🔗︎

# Function defined outside the class
def f1(self, x, y):
    return min(x, x+y)

class C:
    f = f1

    def g(self):
        return 'hello world'

    h = g

通过self引用函数 🔗︎

class Bag:
    def __init__(self):
        self.data = []

    def add(self, x):
        self.data.append(x)

    def addtwice(self, x):
        self.add(x)
        self.add(x)

每一个值都是一个对象,它的对象存储在object.class

继承 🔗︎

语法:

class DerivedClassName(BaseClassName):
    <statement-1>
    .
    .
    .
    <statement-N>

或(moduleName是导入的模块)

class DerivedClassName(moduleName.BaseClassName):

如果引用的属性没有在当前类中找到,会找他的基类。继承的类可以重写基类的方法。 有两个内置的函数很有用: 判断实例的类型:isinstance(obj, int) 判断是否是继承自int类 ( 如果 obj.class 是int或者继承自int类 返回true) issubclass(bool, int): 判断bool是否是int的子类

多继承 🔗︎

class DerivedClassName(Base1, Base2, Base3):
    <statement-1>
    .
    .
    .
    <statement-N>

私有变量和类内引用 🔗︎

私有变量一般是以_下划线开头 内部调用方法__双下划线开头:

class Mapping:
    def __init__(self, iterable):
        self.items_list = []
        self.__update(iterable)

    def update(self, iterable):
        for item in iterable:
            self.items_list.append(item)

    __update = update  # 拷贝一份update方法

class MappingSubclass(Mapping):

    def update(self, keys, values): # 子类可以重写update方法 并且不会影响到init方法
        for item in zip(keys, values):
            self.items_list.append(item)

异常也是一个类 🔗︎

语法:

raise Class, instance
raise instance  (raise instance.__class__, instance的简写)

比如:

class B:
    pass
class C(B):
    pass
class D(C):
    pass

for c in [B, C, D]:
    try:
        raise c()
    except D:
        print "D"
    except C:
        print "C"
    except B:
        print "B"

遍历 🔗︎

大部分的对象都可以遍历

for element in [1, 2, 3]:
    print element
for element in (1, 2, 3):
    print element
for key in {'one':1, 'two':2}:
    print key
for char in "123":
    print char
for line in open("myfile.txt"):
    print line,

内部:for语句调用了iter()方法,然后调用next()方法访问下一个元素,如果没有下一个会抛出StopInteration异常

>>> s = 'abc'
>>> it = iter(s)
>>> it
<iterator object at 0x00A1DB50>
>>> it.next()
'a'
>>> it.next()
'b'
>>> it.next()
'c'
>>> it.next()
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
    it.next()
StopIteration

给类增加遍历器:

class Reverse:
    """Iterator for looping over a sequence backwards."""
    def __init__(self, data):
        self.data = data
        self.index = len(data)

    def __iter__(self):
        return self

    def next(self):
        if self.index == 0:
            raise StopIteration
        self.index = self.index - 1
        return self.data[self.index]

使用:

>>> rev = Reverse('spam')
>>> iter(rev)
<__main__.Reverse object at 0x00A1DB50>
>>> for char in rev:
...    print char
...
m
a
p
s

标准库相关 🔗︎

系统接口 🔗︎

>>> import os
>>> os.getcwd()      # 返回当前目录
'C:\\Python26'
>>> os.chdir('/server/accesslogs')  # 改变工作目录
>>> os.system('mkdir today')  # 运行shell命令:mkdir
0
>>> import os
>>> dir(os)
<returns a list of all module functions>
>>> help(os)
<returns an extensive manual page created from the module's docstrings>

文件操作:

>>> import shutil
>>> shutil.copyfile('data.db', 'archive.db')
>>> shutil.move('/build/executables', 'installdir')

文件通配符 🔗︎

>>> import glob
>>> glob.glob('*.py')
['primes.py', 'random.py', 'quote.py']

命令行参数 🔗︎

比如我们跑了这个命令:python demo.py one two three demo.py里面的写法是:

>>> import sys
>>> print sys.argv
['demo.py', 'one', 'two', 'three']

getopt模块和argparse模块提供了更多灵活的方式访问命令行参数

退出程序和打印错误 🔗︎

sys.exit() 打印错误:

>>> sys.stderr.write('Warning, log file not found starting a new one\n')
Warning, log file not found starting a new one

字符串匹配 🔗︎

>>> import re
>>> re.findall(r'\bf[a-z]*', 'which foot or hand fell fastest')
['foot', 'fell', 'fastest']
>>> re.sub(r'(\b[a-z]+) \1', r'\1', 'cat in the the hat')
'cat in the hat'
>>> 'tea for too'.replace('too', 'two')
'tea for two'

数学 🔗︎

>>> import math
>>> math.cos(math.pi / 4.0)
0.70710678118654757
>>> math.log(1024, 2)
10.0
>>> import random
>>> random.choice(['apple', 'pear', 'banana'])
'apple'
>>> random.sample(xrange(100), 10)  # sampling without replacement
[30, 83, 16, 4, 8, 81, 41, 50, 18, 33]
>>> random.random()    # random float
0.17970987693706186
>>> random.randrange(6)    # random integer chosen from range(6)
4

网络访问 🔗︎

>>> import urllib2
>>> for line in urllib2.urlopen('http://tycho.usno.navy.mil/cgi-bin/timer.pl'):
...    if 'EST' in line or 'EDT' in line:  # look for Eastern Time
...        print line

<BR>Nov. 25, 09:43:32 PM EST

>>> import smtplib
>>> server = smtplib.SMTP('localhost')
>>> server.sendmail('[email protected]', '[email protected]',
... """To: [email protected]
... From: [email protected]
...
... Beware the Ides of March.
... """)
>>> server.quit()

日期 🔗︎

>>> # dates are easily constructed and formatted
>>> from datetime import date
>>> now = date.today()
>>> now
datetime.date(2003, 12, 2)
>>> now.strftime("%m-%d-%y. %d %b %Y is a %A on the %d day of %B.")
'12-02-03. 02 Dec 2003 is a Tuesday on the 02 day of December.'

>>> # dates support calendar arithmetic
>>> birthday = date(1964, 7, 31)
>>> age = now - birthday
>>> age.days
14368

数据压缩 🔗︎

>>> import zlib
>>> s = 'witch which has which witches wrist watch'
>>> len(s)
41
>>> t = zlib.compress(s)
>>> len(t)
37
>>> zlib.decompress(t)
'witch which has which witches wrist watch'
>>> zlib.crc32(s)
226805979

性能测试 🔗︎

>>> from timeit import Timer
>>> Timer('t=a; a=b; b=t', 'a=1; b=2').timeit()
0.57535828626024577
>>> Timer('a,b = b,a', 'a=1; b=2').timeit()
0.54962537085770791

质量控制 🔗︎

运行文档内的测试代码:

def average(values):
    """Computes the arithmetic mean of a list of numbers.

    >>> print average([20, 30, 70])
    40.0
    """
    return sum(values, 0.0) / len(values)

import doctest
doctest.testmod()  # automatically validate the embedded tests

运行测试集:

import unittest

class TestStatisticalFunctions(unittest.TestCase):

    def test_average(self):
        self.assertEqual(average([20, 30, 70]), 40.0)
        self.assertEqual(round(average([1, 5, 7]), 1), 4.3)
        with self.assertRaises(ZeroDivisionError):
            average([])
        with self.assertRaises(TypeError):
            average(20, 30, 70)

unittest.main()  # Calling from the command line invokes all tests

格式化输出 🔗︎

>>> import repr
>>> repr.repr(set('supercalifragilisticexpialidocious'))
"set(['a', 'c', 'd', 'e', 'f', 'g', ...])"

自动换行和格式化:

>>> import pprint
>>> t = [[[['black', 'cyan'], 'white', ['green', 'red']], [['magenta',
...    'yellow'], 'blue']]]
...
>>> pprint.pprint(t, width=30)
[[[['black', 'cyan'],
  'white',
  ['green', 'red']],
  [['magenta', 'yellow'],
  'blue']]]

限制宽度输出:

>>> import textwrap
>>> doc = """The wrap() method is just like fill() except that it returns
... a list of strings instead of one big string with newlines to separate
... the wrapped lines."""
...
>>> print textwrap.fill(doc, width=40)
The wrap() method is just like fill()
except that it returns a list of strings
instead of one big string with newlines
to separate the wrapped lines.

本地化输出:

>>> import locale
>>> locale.setlocale(locale.LC_ALL, 'English_United States.1252')
'English_United States.1252'
>>> conv = locale.localeconv()          # get a mapping of conventions
>>> x = 1234567.8
>>> locale.format("%d", x, grouping=True)
'1,234,567'
>>> locale.format_string("%s%.*f", (conv['currency_symbol'],
...                      conv['frac_digits'], x), grouping=True)
'$1,234,567.80'

模板 🔗︎

>>> from string import Template
>>> t = Template('${village}folk send $$10 to $cause.')
>>> t.substitute(village='Nottingham', cause='the ditch fund')
'Nottinghamfolk send $10 to the ditch fund.'

substitute会替换模板的关键字。如果传递的参数不对会报异常,建议用safe_substitute:

>>> t = Template('Return the $item to $owner.')
>>> d = dict(item='unladen swallow')
>>> t.substitute(d)
Traceback (most recent call last):
  ...
KeyError: 'owner'
>>> t.safe_substitute(d)
'Return the unladen swallow to $owner.'

自定义分隔符号:

>>> import time, os.path
>>> photofiles = ['img_1074.jpg', 'img_1076.jpg', 'img_1077.jpg']
>>> class BatchRename(Template):
...    delimiter = '%'
>>> fmt = raw_input('Enter rename style (%d-date %n-seqnum %f-format):  ')
Enter rename style (%d-date %n-seqnum %f-format):  Ashley_%n%f

>>> t = BatchRename(fmt)
>>> date = time.strftime('%d%b%y')
>>> for i, filename in enumerate(photofiles):
...    base, ext = os.path.splitext(filename)
...    newname = t.substitute(d=date, n=i, f=ext)
...    print '{0} --> {1}'.format(filename, newname)

img_1074.jpg --> Ashley_0.jpg
img_1076.jpg --> Ashley_1.jpg
img_1077.jpg --> Ashley_2.jpg

多线程 🔗︎

import threading, zipfile

class AsyncZip(threading.Thread):
    def __init__(self, infile, outfile):
        threading.Thread.__init__(self)
        self.infile = infile
        self.outfile = outfile

    def run(self):
        f = zipfile.ZipFile(self.outfile, 'w', zipfile.ZIP_DEFLATED)
        f.write(self.infile)
        f.close()
        print 'Finished background zip of: ', self.infile

background = AsyncZip('mydata.txt', 'myarchive.zip')
background.start()
print 'The main program continues to run in foreground.'

background.join()    # Wait for the background task to finish
print 'Main program waited until background was done.'

建议用单线程,然后Queue模块实现多线程的操作,更加容易试错和设计。

日志 🔗︎

import logging
logging.debug('Debugging information')
logging.info('Informational message')
logging.warning('Warning:config file %s not found', 'server.conf')
logging.error('Error occurred')
logging.critical('Critical error -- shutting down')

弱引用 🔗︎

>>> import weakref, gc
>>> class A:
...    def __init__(self, value):
...        self.value = value
...    def __repr__(self):
...        return str(self.value)
...
>>> a = A(10)                  # 创建一个引用
>>> d = weakref.WeakValueDictionary()
>>> d['primary'] = a            # 不会创建引用
>>> d['primary']                # 
10
>>> del a                      # 删除
>>> gc.collect()                # 运行垃圾回收
0
>>> d['primary']                # 这个时候访问会报错
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
    d['primary']                
  File "C:/python26/lib/weakref.py", line 46, in __getitem__
    o = self.data[key]()
KeyError: 'primary'

Lists相关工具集 🔗︎

队列:

>>> from collections import deque
>>> d = deque(["task1", "task2", "task3"])
>>> d.append("task4")
>>> print "Handling", d.popleft()
Handling task1

操作排序:已经排序的插入一个元素

>>> import bisect
>>> scores = [(100, 'perl'), (200, 'tcl'), (400, 'lua'), (500, 'python')]
>>> bisect.insort(scores, (300, 'ruby'))
>>> scores
[(100, 'perl'), (200, 'tcl'), (300, 'ruby'), (400, 'lua'), (500, 'python')]

精确的浮点操作: 🔗︎

>>> from decimal import *
>>> x = Decimal('0.70') * Decimal('1.05')
>>> x
Decimal('0.7350')
>>> x.quantize(Decimal('0.01'))  # round to nearest cent
Decimal('0.74')
>>> round(.70 * 1.05, 2)        # same calculation with floats
0.73