pythoncookbook——数据结构和算法

第一章数据结构和算法

1.1 将序列分解为单独的变量

p = (4, 5)
x, y = p
print x
print y
data = [ ‘acme’, 50, 91.1, (2012, 12, 21) ]
name, shares, price, date = data
print name
print shares
print price
print date
name, shares, price, (year, mon, day ) = data
print year
p = (4, 5)
#x, y, z = p 错误!!!
s = ‘hello!’
a, b, c, d, e, f = s
print a
print f
data = [ ‘acme’, 50, 91.1, (2012, 12, 21) ]
_, shares, price, _ = data
print shares
print price
#其他数据可以丢弃了

1.2 从任意长度的可迭代对象中分解元素

from audioop import avg
def drop_first_last(grades):
first, *middle, last = grades
return avg(middle)
record = (‘dave’, ‘dave@example.com’, ‘777-333-2323’, ‘234-234-2345’)
name, email, *phone_numbers = record
print name
print email
print phone_numbers
*trailing, current = [10, 8, 7, 2, 5]
print trailing #[10, 8, 7, 2, ]
print current #5
records = [
(‘foo’, 1, 2),
(‘bar’, ‘hello’),
(‘foo’, 5, 3)
]
def do_foo(x, y):
print (‘foo’, x, y)
def do_bar(s):
print (‘bar’, s)
for tag, *args in records:
if tag == ‘foo’:
do_foo(*args)
elif tag == ‘bar’:
do_bar(*args)
line = ‘asdf:fedfr234://wef:678d:asdf’
uname, *fields, homedir, sh = line.split(‘:’)
print uname
print homedir
record = (‘acme’, 50, 123.45, (12, 18, 2012))
name, *_, (*_, year) = record
print name
print year
items = [1, 10, 7, 4, 5, 9]
head, *tail = items
print head #1
print tail #[10, 7, 4, 5, 9]
def sum(items):
head, *tail = items
return head + sum(tail) if tail else head
sum(items)

1.3 保存最后n个元素

from _collections import deque
def search(lines, pattern, history=5):
previous_lines = deque(maxlen = history)
for line in lines:
if pattern in line:
yield line, previous_lines
previous_lines.append(line)
# example use on a file
if __name__ == ‘__main__’:
with open(‘somefile.txt’) as f:
for line, prevlines in search(f, ‘python’, 5):
for pline in prevlines:
print (pline) #print (pline, end=”)
print (line) #print (pline, end=”)
print (‘-‘*20)
q = deque(maxlen=3)
q.append(1)
q.append(2)
q.append(3)
print q
q.append(4)
print q
q = deque()
q.append(1)
q.append(2)
q.append(3)
print q
q.appendleft(4)
print q
q_pop = q.pop()
print q_pop
print q
q_popleft = q.popleft()
print q_popleft
print q

1.4 找到最大或最小的n个元素

import heapq
nums = [1,30,6,2,36,33,46,3,23,43]
print (heapq.nlargest(3, nums))
print (heapq.nsmallest(3, nums))
portfolio = [
{‘name’:’ibm’, ‘shares’:100, ‘price’:2.4},
{‘name’:’a’, ‘shares’:1040, ‘price’:12.4},
{‘name’:’s’, ‘shares’:40, ‘price’:23.4},
{‘name’:’d’, ‘shares’:1, ‘price’:2.49},
{‘name’:’f’, ‘shares’:9, ‘price’:24}
]
cheap = heapq.nsmallest(3, portfolio, key=lambda s: s[‘price’])
expensive = heapq.nlargest(3, portfolio, key=lambda s: s[‘price’])
print cheap
print expensive
nums = [1,8,2,23,7,-4,18,23,42,37,2]
heap = list(nums)
print heap
heapq.heapify(heap)
print heap
print heapq.heappop(heap)
print heapq.heappop(heap)
print heapq.heappop(heap)

1.5 实现优先级队列

import heapq
class priorityqueue:
def __init__(self):
self._queue = []
self._index = 0
def push(self, item, priority):
heapq.heappush(self._queue, (-priority, self._index, item))
self._index += 1
def pop(self):
return heapq.heappop(self._queue)[-1]
#example
class item:
def __init__(self, name):
self.name = name
def __repr__(self):
return ‘item({!r})’.format(self.name)
q = priorityqueue()
q.push(item(‘foo’), 1)
q.push(item(‘spam’), 4)
q.push(item(‘bar’), 5)
q.push(item(‘grok’), 1)
print q.pop()
print q.pop()
print q.pop()
a = item(‘foo’)
b = item(‘bar’)
#a < b error a = (1, item('foo')) b = (5, item('bar')) print a < b c = (1, item('grok')) #a < c error a = (1, 0, item('foo')) b = (5, 1, item('bar')) c = (1, 2, item('grok')) print a < b print a < c

1.6 在字典中将建映射到多个值上

d = {
‘a’ : [1, 2, 3],
‘b’ : [4, 5]
}
e = {
‘a’ : {1, 2, 3},
‘b’ : {4, 5}
}
from collections import defaultdict
d = defaultdict(list)
d[‘a’].append(1)
d[‘a’].append(2)
d[‘a’].append(3)
print d
d = defaultdict(set)
d[‘a’].add(1)
d[‘a’].add(2)
d[‘a’].add(3)
print d
d = {}
d.setdefault(‘a’, []).append(1)
d.setdefault(‘a’, []).append(2)
d.setdefault(‘b’, []).append(3)
print d
d = {}
for key, value in d:#pairs:
if key not in d:
d[key] = []
d[key].append(value)
d = defaultdict(list)
for key, value in d:#pairs:
d[key].append(value)

1.7 让字典保持有序

from collections import ordereddict
d = ordereddict()
d[‘foo’] = 1
d[‘bar’] = 2
d[‘spam’] = 3
d[‘grol’] = 4
for key in d:
print (key, d[key])
import json
json.dumps(d)

1.8 与字典有关的计算问题

price = {
‘acme’:23.45,
‘ibm’:25.45,
‘fb’:13.45,
‘io’:4.45,
‘java’:45.45,
‘av’:38.38,
}
min_price = min( zip( price.values(), price.keys() ) )
print min_price
max_price = max( zip( price.values(), price.keys() ) )
print max_price
price_sorted = sorted( zip( price.values(), price.keys() ) )
print price_sorted
price_and_names = zip( price.values(), price.keys() )
print (min(price_and_names))
#print (max(price_and_names)) error zip()创建了迭代器，内容只能被消费一次
print min(price)
print max(price)
print min(price.values())
print max(price.values())
print min(price, key = lambda k : price[k])
print max(price, key = lambda k : price[k])
min_value = price[ min(price, key = lambda k : price[k]) ]
print min_value
price = {
‘aaa’: 23,
‘zzz’: 23,
}
print min( zip( price.values(), price.keys() ) )
print max( zip( price.values(), price.keys() ) )

1.9 在两个字典中寻找相同点

a = {
‘x’:1,
‘y’:2,
‘z’:3
}
b = {
‘x’:11,
‘y’:2,
‘w’:10
}
print a.keys() & b.keys() #{‘x’,’y’}
print a.keys() – b.keys() #{‘z’}
print a.items() & b.items() #{(‘y’, 2)}
c = {key: a[key] for key in a.keys() – {‘z’, ‘w’} }
print c #{‘x’:1, ‘y’:2}

1.10 从序列中移除重复项且保持元素间顺序不变

def dedupe(items):
seen = set()
for item in items:
if item not in seen:
yield item
seen.add(item)
#example
a = [1,5,2,1,9,1,5,10]
print list(dedupe(a))
def dedupe2(items, key = none):
seen = set()
for item in items:
val = item if key is none else key(item)
if val not in seen:
yield item
seen.add(val)
#example
a = [
{‘x’:1, ‘y’:2},
{‘x’:1, ‘y’:3},
{‘x’:1, ‘y’:2},
{‘x’:2, ‘y’:4},
]
print list( dedupe2(a, key=lambda d : (d[‘x’], d[‘y’]) ) )
print list( dedupe2(a, key=lambda d : (d[‘x’]) ) )
a = [1,5,2,1,9,1,5,10]
print set(a)

1.11 对切片命名

items = [0,1,2,3,4,5,6]
a = slice(2,4)
print items[2:4]
print items[a]
items[a] = [10,11]
print items
print a.start
print a.stop
print a.step

1.12 找出序列中出现次数最多的元素

words = [
‘look’, ‘into’, ‘my’, ‘eyes’, ‘look’, ‘into’, ‘my’, ‘eyes’,
‘the’, ‘look’
]
from collections import counter
word_counts = counter(words)
top_three = word_counts.most_common(3)
print top_three
print word_counts[‘look’]
print word_counts[‘the’]
morewords = [‘why’, ‘are’, ‘you’, ‘not’, ‘looking’, ‘in’, ‘my’, ‘eyes’]
for word in morewords:
word_counts[word] += 1
print word_counts[‘eyes’]
print word_counts[‘why’]
word_counts.update(morewords)
print word_counts[‘eyes’]
print word_counts[‘why’]
a = counter(words)
b = counter(morewords)
print a
print b
c = a + b
print c
d = a – b
print b

1.13 通过公共键对字典列表排序

rows = [
{‘fname’:’brian’, ‘lname’:’jones’, ‘uid’:1003},
{‘fname’:’david’, ‘lname’:’beazley’, ‘uid’:1002},
{‘fname’:’john’, ‘lname’:’cleese’, ‘uid’:1001},
{‘fname’:’big’, ‘lname’:’jones’, ‘uid’:1004}
]
from operator import itemgetter
rows_by_fname = sorted(rows, key=itemgetter(‘fname’))
rows_by_uid = sorted(rows, key=itemgetter(‘uid’))
print rows_by_fname
print rows_by_uid
rows_by_lfname = sorted(rows, key=itemgetter(‘lname’, ‘fname’))
print rows_by_lfname
rows_by_fname = sorted(rows, key=lambda r: r[‘fname’])
rows_by_lfname = sorted(rows, key=lambda r: (r[‘fname’], r[‘lname’]))
print rows_by_fname
print rows_by_lfname
print min(rows, key=itemgetter(‘uid’))
print max(rows, key=itemgetter(‘uid’))

1.14 对不原生支持比较操作的对象排序

class user:
def __init__(self, user_id):
self.user_id = user_id
def __repr__(self):
return ‘user({})’.format(self.user_id)
users = [user(23), user(3), user(99)]
print users
print sorted(users, key = lambda u: u.user_id)
from operator import attrgetter
print sorted(users, key=attrgetter(‘user_id’))
print min(users, key=attrgetter(‘user_id’))
print max(users, key=attrgetter(‘user_id’))

1.15 根据字段将记录分组

rows = [
{‘address’:’5412 n clark’, ‘data’:’07/01/2012′},
{‘address’:’5232 n clark’, ‘data’:’07/04/2012′},
{‘address’:’5542 e 58ark’, ‘data’:’07/02/2012′},
{‘address’:’5152 n clark’, ‘data’:’07/03/2012′},
{‘address’:’7412 n clark’, ‘data’:’07/02/2012′},
{‘address’:’6789 w clark’, ‘data’:’07/03/2012′},
{‘address’:’9008 n clark’, ‘data’:’07/01/2012′},
{‘address’:’2227 w clark’, ‘data’:’07/04/2012′}
]
from operator import itemgetter
from itertools import groupby
rows.sort(key=itemgetter(‘data’))
for data, items in groupby(rows, key=itemgetter(‘data’)):
print (data)
for i in items:
print (‘ ‘, i)
from collections import defaultdict
rows_by_date = defaultdict(list)
for row in rows:
rows_by_date[row[‘data’]].append(row)
for r in rows_by_date[’07/04/2012′]:
print(r)

1.16 筛选序列中的元素

mylist = [1,4,-5,10,-7,2,3,-1]
print [n for n in mylist if n > 0]#列表推导式
print [n for n in mylist if n < 0] pos = (n for n in mylist if n > 0)#生成器表达式
print pos
for x in pos:
print(x)
values = [‘1’, ‘2’, ‘-3’, ‘-‘, ‘4’, ‘n/a’, ‘5’]
def is_int(val):
try:
x = int(val)
return true
except valueerror:
return false
ivals = list(filter(is_int, values))
print(ivals)
mylist = [1,4,-5,10,-7,2,3,-1]
import math
print [math.sqrt(n) for n in mylist if n > 0]
clip_neg = [n if n > 0 else 0 for n in mylist]
print clip_neg
clip_pos = [n if n < 0 else 0 for n in mylist] print clip_pos addresses = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'] counts = [0, 3, 10, 4, 1, 7, 6, 1] from itertools import compress more5 = [n > 5 for n in counts]
print more5
print list(compress(addresses, more5))

1.17 从字典中提取子集

prices = {‘acne’:45.23, ‘aapl’:612.78, ‘ibm’:205.55, ‘hpq’:37.20, ‘fb’:10.75}
p1 = { key:value for key, value in prices.items() if value > 200 }
print p1
tech_names = {‘aapl’, ‘ibm’, ‘hpq’}
p2 = { key:value for key, value in prices.items() if key in tech_names }
print p2
p3 = dict( (key, value) for key, value in prices.items() if value > 200 ) #慢
print p3
tech_names = {‘aapl’, ‘ibm’, ‘hpq’}
p4 = { key:prices[key] for key in prices.keys() if key in tech_names } #慢
print p4

1.18 将名称映射到序列的元素中

from collections import namedtuple
subscriber = namedtuple(‘subscriber’, [‘addr’, ‘joined’])
sub = subscriber(‘wang@qq.com’, ‘2020-10-10’)
print sub
print sub.joined
print sub.addr
print len(sub)
addr, joined = sub
print addr
print joined
def compute_cost(records):
total = 0.0
for rec in records:
total += rec[1]*rec[2]
return total
stock = namedtuple(‘stock’, [‘name’, ‘shares’, ‘price’])
def compute_cost2(records):
total = 0.0
for rec in records:
s = stock(*rec)
total += s.shares * s.price
return total
s = stock(‘acme’, 100, 123.45)
print s
#s.shares = 75 #error
s = s._replace(shares=75)
print s
stock = namedtuple(‘stock’, [‘name’, ‘shares’, ‘price’, ‘date’, ‘time’])
stock_prototype = stock(”,0, 0.0, none, none)
def dict_to_stock(s):
return stock_prototype._replace(**s)
a = {‘name’:’acme’, ‘shares’:100, ‘price’:123.45}
print dict_to_stock(a)
b = {‘name’:’acme’, ‘shares’:100, ‘price’:123.45, ‘date’:’12/12/2012′}
print dict_to_stock(b)

1.19 同时对数据做转换和换算

nums = [1, 2, 3, 4, 5]
s = sum( x*x for x in nums )
print s
import os
files = os.listdir(‘dirname’)
if any(name.endswith(‘.py’) for name in files):
print(‘there be python!’)
else:
print(‘sorry, no python!’)
s = (‘acme’, 50, 123.45)
print(‘,’.join(str(x) for x in s))
portfolio = [
{‘name’:’goog’, ‘shares’:50},
{‘name’:’yhoo’, ‘shares’:75},
{‘name’:’aol’, ‘shares’:20},
{‘name’:’scox’, ‘shares’:65}
]
min_shares = min(s[‘shares’] for s in portfolio)
print min_shares
min_shares = min(portfolio, key=lambda s: s[‘shares’])
print min_shares
1.20 将多个映射合并为单个映射
java代码
a = {‘x’:1, ‘z’:3}
b = {‘y’:2, ‘z’:4}
#from collections import chainmap
from pip._vendor.distlib.compat import chainmap
c = chainmap(a, b)
print(c[‘x’])
print(c[‘y’])
print(c[‘z’]) #from a 第一个映射中的值
print len(c)
print list(c.values())
c[‘z’] = 10
c[‘w’] = 40
del c[‘x’]
print a
#del c[‘y’] #error 修改映射的操作总是会作用在列表的第一个映射结构上
values = chainmap()
values[‘x’] = 1
values = values.new_child()#add a new map
values[‘x’] = 2
values = values.new_child()
values[‘x’] = 3
#print values
print values[‘x’]
values = values.parents
print values[‘x’]
values = values.parents
print values[‘x’]
a = {‘x’:1, ‘z’:3}
b = {‘y’:2, ‘z’:4}
merged = dict(b)
merged.update(a)
print merged[‘x’]
print merged[‘y’]
print merged[‘z’]
a[‘x’] = 13
print merged[‘x’] #不会反应到合并后的字典中
a = {‘x’:1, ‘z’:3}
b = {‘y’:2, ‘z’:4}
merged = chainmap(a, b)
print merged[‘x’]
a[‘x’] = 42
print merged[‘x’] #会反应到合并后的字典中

发表评论 取消回复

发表评论取消回复