See the Unix command
uniq -c.
"""
Count how many times each score appears.
"""
import sys
scores = [10, 9, 10, 8, 11, 10, 12, 8, 11] #The most frequent score is 10.
counter = {} #an empty dict
for score in scores:
try:
counter[score] += 1
except KeyError:
counter[score] = 1
for score in sorted(counter):
print(f"{score:2} {counter[score]}")
sys.exit(0)
8 2 9 1 10 3 11 2 12 1
Usually you put keys and values into a
dictionary.
But there is a special type of
dictionary
called a
collections.Counter.
(In the language of Object-Oriented Programming, we say that class
collections.Counter
is a subclass of class
dict.)
You put keys into a
collections.Counter,
and the
collections.Counter
provides the values for you.
"""
Count how many times each score appears.
"""
import sys
import collections
scores = [10, 9, 10, 8, 11, 10, 12, 8, 11] #The most frequent score is 10.
counter = collections.Counter(scores)
print("In original key order:")
for key, value in counter.items(): #The key is the score, the value is the number of occurrences.
print(f"{key:2} {value}")
print()
print("In order of decreasing frequency:")
for key, value in counter.most_common(): #Also try for key, value in reversed(counter.most_common()):
print(f"{key:2} {value}")
print()
print("In order of increasing score:")
for key in sorted(counter): #Also try for key in sorted(counter, reverse = True):
print(f"{key:2} {counter[key]}")
sys.exit(0)
In original key order: 10 3 9 1 8 2 11 2 12 1 In order of decreasing frequency: 10 3 8 2 11 2 9 1 12 1 In order of increasing score: 8 2 9 1 10 3 11 2 12 1
import collections
for t in collections.Counter.mro(): #method resolution order
print(t) #t is a type
<class 'collections.Counter'> <class 'dict'> <class 'object'>
The following program produces exit status 0 if every letter is present, exit status 1 otherwise. See Intersect for another way to find the missing letters.
"""
Count how many times each letter appears. Is every letter present?
"""
import sys
import string
import collections
s = "Pack my box with five dozen liquor jugs." #pangram
#Prep the patient for surgery.
listOfLetters = [c for c in s if c.isalpha()] #listOfLetters is a list of one-character strings.
s = "".join(listOfLetters) #s is a string
s = s.lower()
counter = collections.Counter(s)
for key in sorted(counter): #List the lowercase letters in alphabetical order.
print(key, counter[key])
allPresent = True #Innocent until proven guilty.
for c in string.ascii_lowercase: #Loop 26 times.
if counter[c] == 0:
print(f'"{c}" is missing!')
allPresent = False
sys.exit(0 if allPresent else 1)
a 1 b 1 c 1 d 1 e 2 f 1 g 1 h 1 i 3 j 1 k 1 l 1 m 1 n 1 o 3 p 1 q 1 r 1 s 1 t 1 u 2 v 1 w 1 x 1 y 1 z 1
"""
Count how many times each state was entered.
The counts automatically start at 0.
"""
import sys
import collections
counter = collections.Counter() #Create an empty Counter.
counter["NY"] += 1 #Automatically starts at 0.
counter["NY"] += 1
counter["NJ"] += 1
counter["CT"] += 1
counter["NJ"] += 1
counter["NY"] += 1
for state in sorted(counter): #alphabetical order
print(state, counter[state])
sys.exit(0)
CT 1 NJ 2 NY 3
An example of counting manually.
"""
List the CAMIS number, name, and number of mouse violations
of the 10 restaurants with the largest number of mouse violations.
"""
import sys
import csv #Comma-Separated Values
import urllib.request
import collections
#Database is at
#https://data.cityofnewyork.us/Health/DOHMH-New-York-City-Restaurant-Inspection-Results/43nn-pn8j
url = "https://data.cityofnewyork.us/api/views/43nn-pn8j/rows.csv"
try:
fileFromUrl = urllib.request.urlopen(url)
except urllib.error.URLError as error:
print(error, file = sys.stderr)
sys.exit(1)
sequenceOfBytes = fileFromUrl.read() #Slurp whole file into one big sequenceOfBytes.
fileFromUrl.close()
try:
s = sequenceOfBytes.decode("utf-8") #s is a string
except UnicodeError as error:
print(error, file = sys.stderr)
sys.exit(1)
lines = csv.reader(s.splitlines()) #lines is a list of lists
#Two dictionaries that let you look up a CAMIS number and find the corresponding ...
dba = {} #name of the restaurant
numberOfViolations = collections.Counter() #number of mice violations for that restaurant
for line in lines: #line is a list of 26 strings
if "Evidence of mice or live mice present in facility's food and/or non-food areas." in line[11]:
camis = int(line[0]) #the id number of the restaurant
dba[camis] = line[1] #Record the name of this restaurant.
numberOfViolations[camis] += 1 #Tally an additional violation. Automatically starts at 0.
for camis, n in numberOfViolations.most_common(10): #the 10 worst offenders, starting with the worst
print(f"{camis:8} {n:2} {dba[camis]}")
sys.exit(0)
50016943 13 EL NUEVO ROBLE BILLIARDS 50046623 12 COLD STONE CREAMERY 40423819 12 ALFONSO'S PASTRY SHOPPE 50015263 12 LA POSADA MEXICAN FOOD 41259444 12 COCO ROCO RESTAURANT 50058969 11 CAFE CREOLE 41642251 11 LITTLE CAESARS 50035603 11 AUTHENTIC SZECHUAN 41407999 11 BERMUDEZ BAKERY 50038412 11 TWIN SISTER PAN
lines,
change the rest of the above program to the following.
What are the tradeoffs?
listOfTuples = [
(int(line[0]), line[1]) #tuple containing CAMIS number and name of restaurant
for line in lines
if "Evidence of mice or live mice present in facility's food and/or non-food areas." in line[11]
]
for (camis, dba), n in collections.Counter(listOfTuples).most_common(10): #Start with worst offender.
print(f"{camis:8} {n:2} {dba}")
sys.exit(0)
50016943 13 EL NUEVO ROBLE BILLIARDS 50046623 12 COLD STONE CREAMERY 40423819 12 ALFONSO'S PASTRY SHOPPE 50015263 12 LA POSADA MEXICAN FOOD 41259444 12 COCO ROCO RESTAURANT 50058969 11 CAFE CREOLE 41642251 11 LITTLE CAESARS 50035603 11 AUTHENTIC SZECHUAN 41407999 11 BERMUDEZ BAKERY 50038412 11 TWIN SISTER PANWhat I really want to create is a
dictionary
in which each key is a CAMIS number,
and each value is a
tuple
containing the restaurant’s name and number of mouse violations.
Then I’d like to treat this
dictionary
as a
collections.Counter
based on the number of mouse violations
(and ignoring the name of the restaurant).
Is this possible?
import sys import pandas as pd scores = [10, 9, 10, 8, 11, 10, 12, 8, 11] #The most frequent score is 10. series = pd.Series(scores) #series is a pandas Series. print("The original series:") print(series) print() print("In order of decreasing frequency:") print(series.value_counts(sort = True)) #also try print(series.value_counts(sort = True, ascending = True)) print() print("In order of increasing score:") print(series.value_counts(sort = False)) #also try print(series.value_counts(sort = False).sort_index(ascending = False)) print() counts = series.value_counts(sort = True) mostFrequentScore = counts.index[0] numberOfTimes = counts.array[0] print(f"The most frequently occurring score is {mostFrequentScore}.") print(f"It occurs {numberOfTimes} times.") sys.exit(0)
The original series: 0 10 1 9 2 10 3 8 4 11 5 10 6 12 7 8 8 11 dtype: int64 In order of decreasing frequency: 10 3 11 2 8 2 12 1 9 1 dtype: int64 In order of increasing score: 8 2 9 1 10 3 11 2 12 1 dtype: int64 The most frequently occurring score is 10. It occurs 3 times.