Python programming:関数を作成する PARTⅡ

前回のPython programming関数作成PART1の続きをやる。今回は、Recursion, Iterators, Generators, Generators and comprehensionsなどをやる。

再帰関数は自分自身を呼び出す関数を指し、finite difference equation(有限差分方程式)の直接表記でもある。しかしながら、効率が悪いので、鈍足なPythonで使われるのは稀である。

# The factorial function is perhaps the simplest classic example of recursion.

def fact(n):
    """Returns the factorial of n."""
    # base case
    if n==0:
        return 1
    # recursive case
    else:
        return n * fact(n-1)

print ([fact(n) for n in range(10)])
[1, 1, 2, 6, 24, 120, 720, 5040, 40320, 362880]
# The Fibonacci sequence is another classic recursion example

def fib1(n):
    """Fib with recursion."""
    # base case
    if n==0 or n==1:
        return 1
    # recurssive caae
    else:
        return fib1(n-1) + fib1(n-2)

print ([fib1(i) for i in range(10)])
[1, 1, 2, 3, 5, 8, 13, 21, 34, 55]
# In Python, a more efficient version that does not use recursion is

def fib2(n):
    """Fib without recursion."""
    a, b = 0, 1
    for i in range(1, n+1):
        a, b = b, a+b
    return b

print ([fib2(i) for i in range(10)])
[1, 1, 2, 3, 5, 8, 13, 21, 34, 55]
# Note that the recursive version is much slower than the non-recursive version

%timeit fib1(20)
%timeit fib2(20)

# this is because it makes many duplicate function calls
# Note duplicate calls to fib(2) and fib(1) below
# fib(4) -> fib(3), fib(2)
# fib(3) -> fib(2), fib(1)
# fib(2) -> fib(1), fib(0)
# fib(1) -> 1
# fib(0) -> 1
3.1 ms ± 15.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
1.21 µs ± 4.82 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
# Use of cache to speed up the recursive version.
# Note biding of the (mutable) dictionary as a default at run-time.

def fib3(n, cache={0: 1, 1: 1}):
    """Fib with recursion and caching."""
    try:
        return cache[n]
    except KeyError:
        cache[n] = fib3(n-1) + fib3(n-2)
        return cache[n]

print ([fib3(i) for i in range(10)])

%timeit fib1(20)
%timeit fib2(20)
%timeit fib3(20)
[1, 1, 2, 3, 5, 8, 13, 21, 34, 55]
3.15 ms ± 28.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
1.23 µs ± 9.35 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
121 ns ± 1.17 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)
# Recursion is used to show off the divide-and-conquer paradigm

def almost_quick_sort(xs):
    """Almost a quick sort."""
    # base case
    if xs == []:
        return xs
    # recursive case
    else:
        pivot = xs[0]
        less_than = [x for x in xs[1:] if x <= pivot]
        more_than = [x for x in xs[1:] if x > pivot]
        return almost_quick_sort(less_than) + [pivot] + almost_quick_sort(more_than)

xs = [3,1,4,1,5,9,2,6,5,3,5,9]
print (almost_quick_sort(xs))
[1, 1, 2, 3, 3, 4, 5, 5, 5, 6, 9, 9]

Iterators

Iteratorは値の流れを意味する。何故なら、一度に一つの値しか取得されない(シーケンシャルにしか値を取得しない)からで、メモリをほとんど消費しない。イテレーターの使用は、RAMに収まらない巨大データ・セットを扱うのに非常に役立つ。

# Iterators can be created from sequences with the built-in function iter()

xs = [1,2,3]
x_iter = iter(xs)

print (next(x_iter))
print (next(x_iter))
print (next(x_iter))
print (next(x_iter))
1
2
3
---------------------------------------------------------------------------
StopIteration                             Traceback (most recent call last)
<ipython-input-16-7fd2aa3a4ecb> in <module>()
      7 print (next(x_iter))
      8 print (next(x_iter))
----> 9 print (next(x_iter))

StopIteration: 
# Most commonly, iterators are used (automatically) within a for loop
# which terminates when it encouters a StopIteration exception

x_iter = iter(xs)
for x in x_iter:
    print (x)
1
2
3

Generators

Generatorsは、iterator streamsを作成する。

# Functions containing the 'yield' keyword return iterators
# After yielding, the function retains its previous state

def count_down(n):
    for i in range(n, 0, -1):
        yield i
counter = count_down(10)
print (next(counter))
print (next(counter))
for count in counter:
    print (count, end=' ')
10
9
8 7 6 5 4 3 2 1 
# Iterators can also be created with 'generator expressions'
# which can be coded similar to list generators but with parenthesis
# in place of square brackets

xs1 = [x*x for x in range(5)]
print (xs1)

xs2 = (x*x for x in range(5))
print (xs2)

for x in xs2:
    print (x, end=' ')
print
[0, 1, 4, 9, 16]
<generator object <genexpr> at 0x7f6d745fcb48>
0 1 4 9 16 
<function print>
# Iterators can be used for infinte functions

def fib():
    a, b = 0, 1
    while True:
        yield a
        a, b = b, a+b
for i in fib():
    # We must have a stopping condiiton since the generator returns an infinite stream
    if i > 1000:
        break
    print (i, end=' ')
0 1 1 2 3 5 8 13 21 34 55 89 144 233 377 610 987 
# Many built-in Python functions return iterators
# including file handlers
# so with the idiom below, you can process a 1 terabyte file line by line
# on your laptop without any problem
# Inn Pyhton 3, map and filter return itnrators, not lists

for line in open('foo.txt'):
    print (line, end='')
Hello
World

Generators and comprehensions

# A geneeratorr expression

print ((x for x in range(10)))

# A list comprehesnnion

print ([x for x in range(10)])

# A set comprehension

print ({x for x in range(10)})

# A dictionary comprehension

print ({x: x for x in range(10)})
<generator object <genexpr> at 0x7f6d785ec830>
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
{0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9}

Utilites – enumerate, zip and the ternary if-else operator

# In many programming languages, loops use an index.
# This is possible in Python, but it is more
# idiomatic to use the enumerate function.

# using and index in a loop
xs = [1,2,3,4]
for i in range(len(xs)):
    print (i, xs[i])
print ()

# using enumerate
for i, x in enumerate(xs):
    print (i, x)
0 1
1 2
2 3
3 4

0 1
1 2
2 3
3 4
# zip is useful when you need to iterate over matched elements of
# multiple lists

xs = [1, 2, 3, 4]
ys = [10, 20, 30, 40]
zs = ['a', 'b', 'c', 'd', 'e']

for x, y, z in zip(xs, ys, zs):
    print (x, y, z)

# Note that zip stops when the shortest list is exhausted
1 10 a
2 20 b
3 30 c
4 40 d
# For list comprehensions, the ternary if-else operator is sometimes very useful

[x**2 if x%2 == 0 else x**3 for x in range(10)]
[0, 1, 4, 27, 16, 125, 36, 343, 64, 729]

Decorators

# Here is a simple decorator to time an arbitrary function

def func_timer(func):
    """Times how long the function took."""
    def f(*args, **kwargs):
        import time
        start = time.time()
        results = func(*args, **kwargs)
        print ("Elapsed: %.2fs" % (time.time() - start))
        return results

    return f
# There is a special shorthand notation for decorating functions

@func_timer
def sleepy(msg, sleep=1.0):
    """Delays a while before answering."""
    import time
    time.sleep(sleep)
    print (msg)

sleepy("Hello", 1.5)
Hello
Elapsed: 1.50s

The operator module

import operator as op
from functools import reduce

# Here is another way to express the sum function
print (reduce(op.add, range(10)))

# The pattern can be generalized
print (reduce(op.mul, range(1, 10)))
45
362880
my_list = [('a', 1), ('bb', 4), ('ccc', 2), ('dddd', 3)]

# standard sort
print (sorted(my_list))

# return list sorted by element at position 1 (remember Python counts from 0)
print (sorted(my_list, key=op.itemgetter(1)))

# the key argument is quite flexible
print (sorted(my_list, key=lambda x: len(x[0]), reverse=True))
[('a', 1), ('bb', 4), ('ccc', 2), ('dddd', 3)]
[('a', 1), ('ccc', 2), ('dddd', 3), ('bb', 4)]
[('dddd', 3), ('ccc', 2), ('bb', 4), ('a', 1)]

The functools module

from functools import partial

sum_ = partial(reduce, op.add)
prod_ = partial(reduce, op.mul)
print (sum_([1,2,3,4]))
print (prod_([1,2,3,4]))
10
24
# This is extremely useful to create functions
# that expect a fixed number of arguments

import scipy.stats as stats

def compare(x, y, func):
    """Returne p-value for some appropriate comparison test."""
    return func(x, y)[1]
import numpy as np

x, y = np.random.normal(0, 1, (100,2)).T

print ("p value assuming equal variance    =%.8f" % compare(x, y, stats.ttest_ind))
test = partial(stats.ttest_ind, equal_var=False)
print ("p value not assuming equal variance=%.8f" % compare(x, y, test))
p value assuming equal variance    =0.60440532
p value not assuming equal variance=0.60441159

The itertools module

from itertools import cycle, groupby, islice, permutations, combinations

print (list(islice(cycle('abcd'), 0, 10)))
print ()

animals = sorted(['pig', 'cow', 'giraffe', 'elephant',
                  'dog', 'cat', 'hippo', 'lion', 'tiger'], key=len)
for k, g in groupby(animals, key=len):
    print (k, list(g))
print ()

print ([''.join(p) for p in permutations('abc')])
print ()

print ([list(c) for c in combinations([1,2,3,4], r=2)])
['a', 'b', 'c', 'd', 'a', 'b', 'c', 'd', 'a', 'b']

3 ['pig', 'cow', 'dog', 'cat']
4 ['lion']
5 ['hippo', 'tiger']
7 ['giraffe']
8 ['elephant']

['abc', 'acb', 'bac', 'bca', 'cab', 'cba']

[[1, 2], [1, 3], [1, 4], [2, 3], [2, 4], [3, 4]]


参考サイトhttps://people.duke.edu/