#!/usr/bin/env python
import sys os string
# Utility for handling the intermediate ‘secondary memory‘
def touchopen(filename *args **kwargs):
except OSError:
open(filename “a“).close() # “touch“ file
return open(filename *args **kwargs)
# The constrained memory should have no more than 1024 cells
data = []
# We‘re lucky:
# The stop words are only 556 characters and the lines are all
# less than 80 characters so we can use that knowledge to
# simplify the problem: we can have the stop words loaded in
# memory while processing one line of the input at a time.
# If these two assumptions didn‘t hold the algorithm would
# need to be changed considerably.
# Overall strategy: (PART 1) read the input file count the
# words increment/store counts in secondary memory (a file)
# (PART 2) find the 25 most frequent words in secondary memory
# PART 1:
# - read the input file one line at a time
# - filter the characters normalize to lower case
# - identify words increment corresponding counts in file
# Load the list of stop words
f = open(‘../stop_words.txt‘)
data = [f.read(1024).split(‘‘)] # data[0] holds the stop words
data.append([]) # data[1] is line (max 80 characters)
data.append(None) # data[2] is index of the start_char of word
data.append(0) # data[3] is index on characters i = 0
data.append(False) # data[4] is flag indicating if word was found
data.append(‘‘) # data[5] is the word
data.append(‘‘) # data[6] is wordNNNN
data.append(0) # data[7] is frequency
# Open the secondary memory
word_freqs = touchopen(‘word_freqs‘ ‘rb+‘)
# Open the input file
f = open(sys.argv[1])
# Loop over input file‘s lines
while True:
data[1] = [f.readline()]
if data[1] == [‘‘]: # end of input file
if data[1][0][len(data[1][0])-1] != ‘\n‘: # If it does not end with \n
data[1][0] = data[1][0] + ‘\n‘ # Add \n
data[2] = None
data[3] = 0
# Loop over characters in the line
for c in data[1][0]: # elimination of symbol c is exercise
if data[2] == None:
if c.isalnum():
# We found the start of a word
data[2] = data[3]
if not c.isalnum():
# We found the end of a word. Process it
data[4] = False
data[5] = data[1][0][data[2]:data[3]].lower()
# Ignore words with len < 2 and stop words
if len(data[5]) >= 2 and data[5] not in data[0]:
# Let‘s see if it already exists
while True:
data[6] = word_freqs.readline().strip()
if data[6] == ‘‘:
data[7] = int(data[6].split(‘‘)[1])
# word no white space
data[6] = data[6].split(‘‘)[0].strip()
if data[5] ==
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
文件 54 2017-08-22 16:44 《编程风格:好代码的逻辑》随书代码\.gitignore
文件 497 2017-08-22 16:44 《编程风格:好代码的逻辑》随书代码\01-good-old-times\README.md
文件 4677 2017-08-22 16:44 《编程风格:好代码的逻辑》随书代码\01-good-old-times\tf-01.py
文件 6144 2017-08-22 16:44 《编程风格:好代码的逻辑》随书代码\02-go-forth\forth.py
文件 740 2017-08-22 16:44 《编程风格:好代码的逻辑》随书代码\02-go-forth\README.md
文件 3466 2017-08-22 16:44 《编程风格:好代码的逻辑》随书代码\02-go-forth\tf-02.py
文件 157 2017-08-22 16:44 《编程风格:好代码的逻辑》随书代码\03-monolith\README.md
文件 1817 2017-08-22 16:44 《编程风格:好代码的逻辑》随书代码\03-monolith\tf-03.py
文件 237 2017-08-22 16:44 《编程风格:好代码的逻辑》随书代码\04-cookbook\README.md
文件 1846 2017-08-22 16:44 《编程风格:好代码的逻辑》随书代码\04-cookbook\tf-04.py
文件 103 2017-08-22 16:44 《编程风格:好代码的逻辑》随书代码\05-pipeline\Makefile
文件 307 2017-08-22 16:44 《编程风格:好代码的逻辑》随书代码\05-pipeline\README.md
文件 1217 2017-08-22 16:44 《编程风格:好代码的逻辑》随书代码\05-pipeline\tf-05.clj
文件 3561 2017-08-22 16:44 《编程风格:好代码的逻辑》随书代码\05-pipeline\tf-05.cpp
文件 1978 2017-08-22 16:44 《编程风格:好代码的逻辑》随书代码\05-pipeline\tf-05.py
文件 248 2017-08-22 16:44 《编程风格:好代码的逻辑》随书代码\05-pipeline\tf-09.sh
文件 131 2017-08-22 16:44 《编程风格:好代码的逻辑》随书代码\06-code-golf\README.md
文件 431 2017-08-22 16:44 《编程风格:好代码的逻辑》随书代码\06-code-golf\tf-06-1.py
文件 663 2017-08-22 16:44 《编程风格:好代码的逻辑》随书代码\06-code-golf\tf-06-bm.py
文件 371 2017-08-22 16:44 《编程风格:好代码的逻辑》随书代码\06-code-golf\tf-06-pn.py
文件 418 2017-08-22 16:44 《编程风格:好代码的逻辑》随书代码\06-code-golf\tf-06.clj
文件 249 2017-08-22 16:44 《编程风格:好代码的逻辑》随书代码\06-code-golf\tf-06.py
文件 249 2017-08-22 16:44 《编程风格:好代码的逻辑》随书代码\06-code-golf\tf-06.rb
文件 872 2017-08-22 16:44 《编程风格:好代码的逻辑》随书代码\06-code-golf\tf-06.scala
文件 250 2017-08-22 16:44 《编程风格:好代码的逻辑》随书代码\07-infinite-mirror\README.md
文件 1380 2017-08-22 16:44 《编程风格:好代码的逻辑》随书代码\07-infinite-mirror\tf-07.py
文件 614 2017-08-22 16:44 《编程风格:好代码的逻辑》随书代码\08-kick-forward\README.md
文件 1199 2017-08-22 16:44 《编程风格:好代码的逻辑》随书代码\08-kick-forward\tf-08.py
文件 725 2017-08-22 16:44 《编程风格:好代码的逻辑》随书代码\09-the-one\README.md
文件 1008 2017-08-22 16:44 《编程风格:好代码的逻辑》随书代码\09-the-one\tf-04-fold.scala
共有 条评论