# #this script reads a file and counts lines which appear >= 2 times # #open the file in Read only mode file = open("emails_list.txt", "r") #read lines to list called "lines" #first line is at lines[0], second in lines[1] etc lines = file.readlines() #close the "connection" to the file file.close() #create a new empty dictionary called emails_dict emails_dict = {} #loop trough the lines list. Line = current line from the list #IF line not found from the dictionary, then add it there with the value 0 #IF line is found from the dictionary, then +1 for its value for line in lines: if line not in emails_dict: emails_dict[line] = 0 emails_dict[line] += 1 #lets create a new file called output_file.csv #open it as "w" = we are able to write in it. output_file = open("output_file.csv", "w") #loop trough the dictionary # i = email address # if the email address appears 2 or more times # then write it into the output_file for i in emails_dict: if emails_dict[i] >= 2: # create a new variable which will include the whole writable line # i.rstrip() removes the whitespaces from the i # then we add ; delimeter (for the cvs file) # emails_dict[i] returns the (int) value how many times the email address # was found from the emails_list. We also convert it to string # at the end we add newline "\n" # example: email_count = email@address.com;3 (newline) email_count = i.rstrip() + ";" + str(emails_dict[i]) + "\n" #write the email_count line to the output_file output_file.write(email_count) #close the file connection. output_file.close()