# -- coding: utf-8 --
import os
import numpy as np
import pandas as pd
import time
start=() #Used to record program running time
('G:\\GISworkspace\\Rprocess')
tx=open('level3_river_clipV6.csv')
df=pd.read_csv(tx)
()
Cname=df['NAME_CH']
num=(range(0,len(Cname))) #Use to record the number of recurring occurrences of each place name
# print(num)
# print(Cname)
space=range(0,len(Cname))
# print(space,type(space))
space=(space,columns=['name'])
# space['name'][0]=(Cname[2])
# print(space)
# print(df['NAME_CH']) #Take a column
for i in range(len(Cname)):
k = 0 #Number of records
for j in range(len(Cname)):
if (Cname[i]==Cname[j]):
space['name'][i]=Cname[j]
k=k+1
else:
k=k
num[i] = k
# print(space)
# print(num)
num=(data=num,columns=['num'])
# print(num)
frame=[space,num] #concat performs two dataframe merging
result=(frame,axis=1) #axis=1 is a right connection, =0 is a down connection
# print(result)
result=result.drop_duplicates('name',keep='first') #Remove duplicate rows, you can use single columns or multiple columns, that is, result.drop_duplicates(['col1','col2'], keep='first'),
# keep='first' means only the value that appears first
result.to_csv('Cname_10km.csv') #Output
end=()
print(end-start) #Check program running time R runs 4.7min, python runs 31s