array
module in the
Python Standard Library
A
container
is an object such as a
list,
tuple,
dict,
or
set.
A
list
is a heterogeneous container
because it can contain items of different data types.
A
array.array
is a homogeneous container because all its items must be of the same data type.
(Other examples of homogeneous containers are
str
and
bytes.)
person = ["Smith", "John", "A", 10040, 2122345678, True] #a list
Why make a heterogeneous
list
instead of six separate variables?
lastName = "Smith" firstName = "John" middleInitial = "A" zipCode = 10040 phoneNumber = 2122345678 gender = True
Because putting them into a
list
makes it easier to have more than one person:
import sys
persons = [
["Smith", "John", "A", 10040, 2122345678, True],
["Public", "John", "Q", 10003, 7182345678, True],
["Doe", "Jane", None, 10003, 9142345678, False],
["Sixpack", "Joe", None, 10003, 9143456789, False]
]
phoneNumbers = [person[4] for person in persons]
westchesterPhoneNumbers = [phoneNumber for phoneNumber in phoneNumbers
if str(phoneNumber).startswith("914")]
print(f"westchesterPhoneNumbers = {westchesterPhoneNumbers}") #Python 3.7
print(f"{westchesterPhoneNumbers = }") #Python 3.8
sys.exit(0)
westchesterPhoneNumbers = [9142345678, 9143456789] westchesterPhoneNumbers = [9142345678, 9143456789]
"How many bytes are occupied by 100 million floats?"
import sys
import array
n = 100_000_000 #underscores ignored
print("list:")
li = [float(i) for i in range(n)] #a list of floats
print(f"{len(li) = :11,}")
print(f"{sys.getsizeof(li) = :11,}")
print(f"{sys.getsizeof(li) / len(li) = }")
print()
print("tuple:")
tu = tuple(li)
print(f"{len(tu) = :11,}")
print(f"{sys.getsizeof(tu) = :11,}")
print()
print("array.array:")
ar = array.array("d", li) #"d" for "double"
print(f"{len(ar) = :11,}")
print(f"{ar.buffer_info()[1] = :11,}")
print(f"{ar.itemsize = :11,}")
print(f"{sys.getsizeof(ar) = :11,}")
print()
sys.exit(0)
list: len(li) = 100,000,000 sys.getsizeof(li) = 859,724,464 sys.getsizeof(li) / len(li) = 8.59724464 tuple: len(tu) = 100,000,000 sys.getsizeof(tu) = 800,000,040 array.array: len(ar) = 100,000,000 ar.buffer_info()[1] = 100,000,000 ar.itemsize = 8 sys.getsizeof(ar) = 800,000,064
"How many seconds does it take to slice a list of 100 million floats?"
import sys
import timeit
n = 100_000_000 #how many floats
repeat = 4 #Do the experiment 4 times.
number = 1 #Each experiment consists of 1 execution of the code.
setup = f"l = [float(i) for i in range({n})]"
code = "l = l[::2]"
s = timeit.repeat(code, setup = setup, repeat = repeat, number = number)
print(f"{s} seconds")
minList = min(s)
print(f"{minList} seconds is the minimum.")
print()
setup = f'import array; a = array.array("d", range({n}))'
code = "a = a[::2]"
s = timeit.repeat(code, setup = setup, repeat = repeat, number = number)
print(f"{s} seconds")
minArray = min(s)
print(f"{minArray} seconds is the minimum.")
print()
print(f"The array.array is {minList / minArray} times faster than the list.")
sys.exit(0)
[1.9324892540000018, 1.4928774520000019, 1.427949697999999, 1.396348383000003] seconds 1.396348383000003 seconds is the minimum. [0.360193326000001, 0.34141492199999846, 0.33032098099999985, 0.3401288020000095] seconds 0.33032098099999985 seconds is the minimum. The array is 4.227247021284438 times faster than the list.