44# '
55# ' @param dpp The output of [dynamic_per_pixel()]
66# ' @param kMax Tha maximum number of clusters to be tested
7- # ' @param explanation_objective The a threshold between 0 and 1 for the ratio
8- # ' between Within-Cluster sum of squares and Between-Cluster sum of squares.
7+ # ' @param minimum_clusterSize The minimum size of a cluster allowed. By default
8+ # ' 1% of all moving average pixels, need to be in a cluster.
99# ' @param plot_result Logical
1010# ' @param correlate_first If TRUE, Pearson correlation between pixels will be
1111# ' used for clustering instead of Index values. This results in a clustering
2828# ' @export
2929# '
3030best_nk <- function (
31- dpp , kMax = 10 , explanation_objective = 0.98 , plot_result = TRUE , correlate_first = FALSE
31+ dpp , kMax = 10 , minimum_clusterSize = 0.01 , plot_result = TRUE , correlate_first = FALSE
3232){
3333 y <- prepare_for_clustering(
3434 moving_averages_matrix = dpp $ moving_averages [,- 1 ],
3535 correlate_first = correlate_first ,
3636 maxPixels = 10000
3737 )
3838
39- wcss <- c()
40- withinBetween <- 0
4139 i <- 1
42- while (withinBetween < explanation_objective ){
40+ kmeansOutput <- kmeans(t(y ), centers = i , iter.max = 20 , nstart = 2 )
41+ wcss <- withinBetween <- kmeansOutput $ betweenss / kmeansOutput $ totss
42+ while (all(kmeansOutput $ size > minimum_clusterSize * ncol(y ))){
4343 if (i > kMax ){
4444 break
4545 }
46+ i <- i + 1
4647 cat(paste0(" Calculation of " , i , ifelse(i == 1 , " cluster" , " clusters" ), " ... \n " ))
47-
4848 kmeansOutput <- kmeans(t(y ), centers = i , iter.max = 20 , nstart = 2 )
49-
49+
5050 withinBetween <- kmeansOutput $ betweenss / kmeansOutput $ totss
5151 wcss <- c(wcss , withinBetween )
52- i <- i + 1
5352 }
5453
5554 par(mar = c(4.1 , 4.1 , 4.1 , 2.1 ))
@@ -65,7 +64,6 @@ best_nk <- function(
6564 best_nCluster <- length(wcss )
6665
6766 abline(v = best_nCluster )
68- abline(h = explanation_objective * 100 , lty = " dotted" )
6967
7068 tp <- ifelse(
7169 abs(best_nCluster - par(" xaxp" )[2 ]) < abs(best_nCluster - par(" xaxp" )[1 ]),
@@ -107,7 +105,7 @@ pixel_clusters <- function(
107105 keep <- 1 : 365
108106 if (! whole_dynamic ){
109107 proportionToRemove <- 100000 / ncol(dpp $ moving_averages )
110- i_d <- round (1 / proportionToRemove )
108+ i_d <- ceiling (1 / proportionToRemove )
111109 if (i_d > 10 ){
112110 i_d <- 10
113111 }
@@ -149,10 +147,17 @@ pixel_clusters <- function(
149147# ' rows of the moving averages of one index as created by [dynamic_per_pixel()]
150148# ' @param correlate_first If TRUE, Pearson correlation between pixels will be
151149# ' used for clustering instead of Index values. This results in a clustering
152- # ' based on different shapes of the dynamic.
150+ # ' based on different shapes of the dynamic.
153151# ' @param maxPixels The maximum numbers of pixel used for cluster analysis.
154152# ' Leave NULL to use all pixels.
155153# '
154+ # ' @details
155+ # ' If correlate first, pixels are identified as outliers and removed if there
156+ # ' not at least 5% other pixels that correlate at least 0.95. For example, given
157+ # ' 1000 correlated pixels, the evaluated pixels is not an outlier if it
158+ # ' correlates to >= 50 pixels >= 0.95.
159+ # '
160+ # '
156161# ' @return A matrix either with data per pixels or correlation between pixels
157162# ' used for following cluster analysis
158163# '
@@ -189,6 +194,21 @@ prepare_for_clustering <- function(
189194 }
190195 cat(paste0(" correlation of " , ncol(ts_table ), " pixels ... \n " ))
191196 y <- t(cor(x = ts_table , y = ts_reference , use = " pairwise.complete.obs" ))
197+
198+
199+ # remove pixels that have a low correlation to almost "all" others
200+ # highCor <- apply(y, 2, quantile, p = 0.95)
201+ # )
202+ # cat(paste0(
203+ # sum(highCor < 0.95), " pixels of ", ncol(y), " pixels identified as ",
204+ # "outlier and removed ... \n")
205+ # )
206+ #
207+ # # maxCor <- apply(y, 2, max)
208+ # # maxCor >= 0.99
209+ #
210+ # sum(which(maxCor < 0.99) %in% which(highCor < 0.95))
211+ # y <- round(y[,highCor >= 0.95], 5)
192212 y <- round(y , 5 )
193213 } else {
194214 y <- ts_table
0 commit comments