# visualizing the distribution of blocks in ChangSha data.plot()
1 2 3 4 5
# exploratory data analysis D=copy.deepcopy(data) data.shape # (3700, 14) data.head(5)
OBJECTID
lb
Shape_Leng
Shape_Area
idd
OBJECTID_1
idd_1
COUNT
AREA
MEAN
MEAN_1
MEAN_12
MEAN_12_13
geometry
0
1
0
462.698517
7842.037150
1
1
1
634
10138.566790
4.885384
419.828282
583860.130028
1.097882e+06
POLYGON Z ((694423.228 3118300.460 0.000, 6943...
1
2
0
1693.463425
115652.876853
2
2
2
9333
149248.018695
2.210137
49.167335
177414.284438
7.246673e+05
POLYGON Z ((698632.417 3106524.626 0.000, 6987...
2
3
11
1100.566517
76822.340277
3
3
3
6191
99002.944792
2.194382
55.816331
83970.746248
6.581549e+05
POLYGON Z ((698213.537 3106892.204 0.000, 6981...
3
4
0
813.027834
33316.074792
4
4
4
2683
42905.007410
3.220493
60.284221
138651.257203
7.675609e+05
POLYGON Z ((700789.640 3106976.923 0.000, 7006...
4
5
0
931.012175
54895.664372
5
5
5
4426
70778.070368
1.702563
34.498813
145172.013136
7.156062e+05
POLYGON Z ((699126.327 3107255.440 0.000, 6989...
这些Mean由城市开敞度、绿度、人口密度等属性构成。
1 2 3 4 5 6
data['class']=[0if data.iloc[i]['lb']!=0else1for i inrange(data.shape[0])] # assign the type according to the lb attribute. # lb!=0 means inefficient land-use block
# construct the adjacency list between two types. Df=collections.defaultdict(list) A=data[data['class']==0] # inefficient B=copy.deepcopy(data) isConsidered=collections.defaultdict(list)
for i inrange(B.shape[0]): # isConsidered:[Is it used?the closest object] isConsidered[B.iloc[i].OBJECTID]=[False,B.iloc[i].OBJECTID]
# Determining adjacency import alive_progress Df=collections.defaultdict(list) InvertDf=collections.defaultdict(list) with alive_progress.alive_bar(A.shape[0],force_tty=True) as bar: for i inrange(A.shape[0]): for j inrange(B.shape[0]): if (c:=A.iloc[i])["geometry"].intersects((v:=B.iloc[j]).geometry): Df[c.OBJECTID].append(v.OBJECTID) InvertDf[v.OBJECTID].append(c.OBJECTID) bar()
# Using Entropy Weight Method # By the way, perhaps Coefficient of Variation Method is better
defEWM(data): t=(data-data.min(axis=0))/(data.max(axis=0)-data.min(axis=0)) rt=t t=t/t.sum(axis=0) t[t<0.0001]=0.0001 entropy=-1/np.log(t.shape[0])*np.sum(t*np.log(t)) iftype(entropy)!=list: entropy=[entropy] return [(1-i)/(len(entropy)-sum(entropy)) for i in entropy],rt
classus(object): def__init__(self,n): self.n=n self.init() deffind(self,x): if self.parents[x]!=x:self.parents[x]=self.find(self.parents[x]) return self.parents[x] defunion(self,a,b): if (ra:=self.find(a))!=(rb:=self.find(b)): self.parents[ra]=rb self.areas[rb]+=self.areas[ra] definit(self): self.parents=dict(zip((v:=[self.n.iloc[i].OBJECTID for i inrange(self.n.shape[0])]),v)) self.areas=dict(zip(v,[self.n.iloc[i].Shape_Area for i inrange(self.n.shape[0])])) defgetArea(self,x): return self.areas[x]
defInitUS(epsilon=0.9999): u=us(A) for i inrange(A.shape[0]): idxI=A.iloc[i].OBJECTID for j inrange(i+1,A.shape[0]): idxJ=A.iloc[j].OBJECTID xi,xj=nf[nf['oid']==idxI].values.reshape(-1)[:-1],nf[nf['oid']==idxJ].values.reshape(-1)[:-1] if sim(xi,xj)>=epsilon: u.union(idxI,idxJ) return u u=InitUS()
# Iterate over InvertDf to determine the similarity of the source points defSourceSim(T:list,epsilon=0.9): s=A[A['OBJECTID']==T[0]]['Shape_Area'].values[0] R=[T[0]] if (v:=len(T))==1: return s,R
for n inrange(v): ts=A[A['OBJECTID']==T[n]]['Shape_Area'].values[0] record=[T[n]] for m inrange(n+1,v): xn,xm=nf[nf['oid']==T[n]].values.reshape(-1)[:-1],nf[nf['oid']==T[m]].values.reshape(-1)[:-1] if sim(xn,xm)>=epsilon: ts+=A[A['OBJECTID']==T[m]]['Shape_Area'].values[0] record.append(T[m]) if s<ts: s=ts R=record return s,R
defbone_area_withInvert(alpha,isConsidered): ic=copy.deepcopy(isConsidered) newDf=copy.deepcopy(Df) with alive_progress.alive_bar(len(newDf),force_tty=True) as bar: for i,j in newDf.items(): x=nf[nf['oid']==i].values.reshape(-1)[:-1] for _ in j: y=nf[nf['oid']==_].values.reshape(-1)[:-1] if ic[_][0]:continue S_y=B[B['OBJECTID']==_]['Shape_Area'].values[0] S_x,records=SourceSim(InvertDf[_],0.7) epsilon=0.5if S_y<=(0.5*S_x) else1/(1+np.exp(-alpha*(S_y/S_x))) s=sim(x,y) for k in records: if (S:=sim(nf[nf['oid']==k].values.reshape(-1)[:-1],y))>=epsilon and S>=s:
# Assign attributes data['IsUpdate']=[Falseif ((v:=data.iloc[i])['class']==0or IC[v.OBJECTID][0]==False) elseTruefor i inrange(data.shape[0])] data['Source']=[data.iloc[i].OBJECTID if (v:=data.iloc[i])["class"]==0else IC[v.OBJECTID][1] for i inrange(data.shape[0])] data['Final']=[0if (data.iloc[i]['class']==0or data.iloc[i]['IsUpdate']) else1for i inrange(data.shape[0])]
# undo buffer data.geometry=D.geometry
# save data.to_file(path+r"\\Recluster_with4features_.shp",encoding='utf-8')