MATSIM
DensityCluster.java
Go to the documentation of this file.
1 /* *********************************************************************** *
2  * project: org.matsim.*
3  * DJCluster.java
4  * *
5  * *********************************************************************** *
6  * *
7  * copyright : (C) 2009 by the members listed in the COPYING, *
8  * LICENSE and WARRANTY file. *
9  * email : info at matsim dot org *
10  * *
11  * *********************************************************************** *
12  * *
13  * This program is free software; you can redistribute it and/or modify *
14  * it under the terms of the GNU General Public License as published by *
15  * the Free Software Foundation; either version 2 of the License, or *
16  * (at your option) any later version. *
17  * See also COPYING, LICENSE and WARRANTY file *
18  * *
19  * *********************************************************************** */
20 
21 package org.matsim.core.network.algorithms.intersectionSimplifier;
22 
23 import java.io.BufferedWriter;
24 import java.io.IOException;
25 import java.util.*;
26 
27 import org.apache.commons.csv.CSVFormat;
28 import org.apache.commons.csv.CSVPrinter;
29 import org.apache.logging.log4j.LogManager;
30 import org.apache.logging.log4j.Logger;
31 import org.matsim.api.core.v01.Coord;
32 import org.matsim.api.core.v01.Id;
37 import org.matsim.core.utils.io.IOUtils;
38 
39 
61 public class DensityCluster {
62  private final List<Node> inputPoints;
63  private final Map<Id<Coord>, ClusterActivity> lostPoints = new TreeMap<>();
65  private final List<Cluster> clusterList;
66  private final static Logger log = LogManager.getLogger(DensityCluster.class);
67  private String delimiter = ",";
68  private final boolean silent;
69  private final String[] csvHeader = new String[]{"clusterId", "lon", "lat", "numberOfActivities"};
70 
75  public DensityCluster(List<Node> nodesToCluster, boolean silent) {
76  this.inputPoints = nodesToCluster;
77 
78  /*TODO Remove later. */
79  int nullCounter = 0;
80  for (Node node : inputPoints) {
81  if (node == null || node.getCoord() == null) {
82  nullCounter++;
83  }
84  }
85  if (nullCounter > 0) {
86  log.warn("In DJCluster: of the " + inputPoints.size() + " points, " + nullCounter + " were null.");
87  }
88 
89  this.clusterList = new ArrayList<>();
90  this.silent = silent;
91  }
92 
93 
100  public void clusterInput(double radius, int minimumPoints) {
101  if (this.inputPoints.isEmpty()) {
102  log.warn("DJCluster.clusterInput() called, but no points to cluster.");
103  } else {
104  if (!silent) {
105  log.info("Clustering input points. This may take a while.");
106  }
107  int clusterIndex = 0;
108  int pointMultiplier = 1;
109  int uPointCounter = 0;
110  int cPointCounter = 0;
111 
112  /*
113  * Determine the extent of the QuadTree.
114  */
115  double xMin = Double.POSITIVE_INFINITY;
116  double yMin = Double.POSITIVE_INFINITY;
117  double xMax = Double.NEGATIVE_INFINITY;
118  double yMax = Double.NEGATIVE_INFINITY;
119 
120  for (Node node : this.inputPoints) {
121  Coord c = node.getCoord();
122  /* TODO Remove if no NullPointerExceptions are thrown. */
123  if (c == null) {
124  log.warn("Coord is null. Number of points in list: " + inputPoints.size());
125  } else {
126  xMin = Math.min(xMin, c.getX());
127  yMin = Math.min(yMin, c.getY());
128  xMax = Math.max(xMax, c.getX());
129  yMax = Math.max(yMax, c.getY());
130  }
131  }
132  /*
133  * Build a new QuadTree, and place each point in the QuadTree as a ClusterActivity.
134  * The geographic coordinates of each point is used as the keys in the QuadTree.
135  * Initially all ClusterPoints will have a NULL reference to its cluster. An
136  * ArrayList of Points is also kept as iterator for unclustered points.
137  */
138  if (!silent) {
139  log.info("Place points in QuadTree.");
140  }
141  quadTree = new QuadTree<>(xMin - 1, yMin - 1, xMax + 1, yMax + 1);
142  List<ClusterActivity> listOfPoints = new ArrayList<>();
143  for (int i = 0; i < this.inputPoints.size(); i++) {
144  double x = inputPoints.get(i).getCoord().getX();
145  double y = inputPoints.get(i).getCoord().getY();
146  ClusterActivity cp = new ClusterActivity(Id.create(i, Coord.class), inputPoints.get(i), null);
147  quadTree.put(x, y, cp);
148  listOfPoints.add(cp);
149  }
150  if (!silent) {
151  log.info("Done placing activities.");
152  }
153 
154  /* We specifically do not use the standard MATSim counter class
155  * because we want to retain the option to run this clustering in
156  * parallel and, consequently, silently without log messages. */
157  int pointCounter = 0;
158  while (pointCounter < listOfPoints.size()) {
159  // Get next point.
160  ClusterActivity p = listOfPoints.get(pointCounter);
161 
162  if (p.getCluster() == null) {
163  // Compute the density-based neighbourhood, N(p), of the point p
164  Collection<ClusterActivity> neighbourhood = quadTree.getDisk(p.getCoord().getX(), p.getCoord().getY(), radius);
165  List<ClusterActivity> uN = new ArrayList<>(neighbourhood.size());
166  List<ClusterActivity> cN = new ArrayList<>(neighbourhood.size());
167  for (ClusterActivity cp : neighbourhood) {
168  if (cp.getCluster() == null) {
169  uN.add(cp);
170  } else {
171  cN.add(cp);
172  }
173  }
174  if (neighbourhood.size() < minimumPoints) {
175  /* Point is considered to be noise.
176  * FIXME Not quite true... it may be incorporated into
177  * another cluster later! (JWJ - Mar '14)
178  */
179 
180  lostPoints.put(p.getId(), p);
181  uPointCounter++;
182  } else if (!cN.isEmpty()) {
183  /*
184  * Merge all the clusters. Use the DensityCluster with the smallest clusterId
185  * value as the remaining DensityCluster.
186  */
187  List<Cluster> localClusters = new ArrayList<>();
188  Cluster smallestCluster = cN.getFirst().getCluster();
189  for (int i = 1; i < cN.size(); i++) {
190  if (Integer.parseInt(cN.get(i).getCluster().getId().toString()) <
191  Integer.parseInt(smallestCluster.getId().toString())) {
192  smallestCluster = cN.get(i).getCluster();
193  }
194  if (!localClusters.contains(cN.get(i).getCluster())) {
195  localClusters.add(cN.get(i).getCluster());
196  }
197  }
198  for (Cluster DigicoreCluster : localClusters) {
199  if (!DigicoreCluster.equals(smallestCluster)) {
200  List<ClusterActivity> thisClusterList = DigicoreCluster.getPoints();
201  for (ClusterActivity clusterActivity : thisClusterList) {
202  // Change the cluster reference of the ClusterActivity.
203  clusterActivity.setCluster(smallestCluster);
204  // Add the ClusterActivity to the new cluster.
205  smallestCluster.getPoints().add(clusterActivity);
206  }
207  }
208  }
209 
210  // Add unclustered points in the neighborhood.
211  for (ClusterActivity cp : uN) {
212  smallestCluster.getPoints().add(cp);
213  cp.setCluster(smallestCluster);
214  cPointCounter++;
215  if (lostPoints.containsKey(cp.getId())) {
216  lostPoints.remove(cp.getId());
217  uPointCounter--;
218  }
219  }
220  } else {
221  // Create a new cluster and add all the points.
222  Cluster newCluster = new Cluster(Id.create(clusterIndex, Cluster.class));
223  clusterIndex++;
224 
225  for (ClusterActivity cp : uN) {
226  cp.setCluster(newCluster);
227  newCluster.getPoints().add(cp);
228  cPointCounter++;
229  if (lostPoints.containsKey(cp.getId())) {
230  lostPoints.remove(cp.getId());
231  uPointCounter--;
232  }
233  }
234  }
235  }
236  pointCounter++;
237  // Report progress
238  if (!silent) {
239  if (pointCounter == pointMultiplier) {
240  log.info(" Points clustered: " + pointCounter);
241  pointMultiplier *= 4;
242  }
243  }
244  }
245 
246 
247  if (!silent) {
248  log.info(" Points clustered: " + pointCounter + " (Done)");
249  int sum = cPointCounter + uPointCounter;
250  log.info("Sum should add up: " + cPointCounter + " (clustered) + "
251  + uPointCounter + " (unclustered) = " + sum);
252 
253  /* Code added for Joubert & Meintjes paper (2014). */
254  log.info("Unclustered points: ");
255  for (ClusterActivity ca : lostPoints.values()) {
256  log.info(String.format(" %.6f,%.6f", ca.getCoord().getX(), ca.getCoord().getY()));
257  }
258  log.info("New way of unclustered points:");
259  log.info(" Number: " + lostPoints.size());
260  }
261 
262  /*
263  * Build the cluster list. Once built, rename the clusterId field so as to
264  * start at '0', and increment accordingly. This allows to directly use
265  * the clusterId field as 'row' and 'column' reference in the 2D matrices
266  * when determining adjacency in Social Network Analysis.
267  */
268  if (!silent) {
269  log.info("Building the cluster list (2 steps)");
270  }
271  Map<Cluster, List<ClusterActivity>> clusterMap = new HashMap<>();
272 
273  if (!silent) {
274  log.info("Step 1 of 2:");
275  log.info("Number of ClusterPoints to process: " + listOfPoints.size());
276  }
277  int cpCounter = 0;
278  int cpMultiplier = 1;
279  for (ClusterActivity ca : listOfPoints) {
280  Cluster theCluster = ca.getCluster();
281  if (theCluster != null) {
282  if (!clusterMap.containsKey(theCluster)) {
283  List<ClusterActivity> newList = new ArrayList<>();
284  clusterMap.put(theCluster, newList);
285  }
286  clusterMap.get(theCluster).add(ca);
287  }
288 
289  if (!silent) {
290  if (++cpCounter == cpMultiplier) {
291  log.info(" ClusterPoints processed: " + cpCounter + " (" + String.format("%3.2f", ((double) cpCounter / (double) listOfPoints.size()) * 100) + "%)");
292  cpMultiplier *= 2;
293  }
294  }
295  }
296  if (!silent) {
297  log.info(" ClusterPoints processed: " + cpCounter + " (Done)");
298  }
299 
300  if (!silent) {
301  log.info("Step 2 of 2:");
302  log.info("Number of clusters to process: " + clusterMap.keySet().size());
303  }
304  int clusterCounter = 0;
305  int clusterMultiplier = 1;
306  int clusterNumber = 0;
307  for (Map.Entry<Cluster, List<ClusterActivity>> e : clusterMap.entrySet()) {
308  Cluster digicoreCluster = e.getKey();
309  List<ClusterActivity> listOfClusterPoints = e.getValue();
310  if (listOfClusterPoints.size() >= minimumPoints) {
311  digicoreCluster.setClusterId(Id.create(clusterNumber++, Cluster.class));
312  clusterNumber++;
313  digicoreCluster.setCenterOfGravity();
314  clusterList.add(digicoreCluster);
315  } else if (!silent) {
316  log.warn(" ... why do we HAVE a cluster with too few points?...");
317  }
318 
319  if (!silent) {
320  if (++clusterCounter == clusterMultiplier) {
321  log.info(" Clusters processed: " + clusterCounter + " (" + String.format("%3.2f", ((double) clusterCounter / (double) clusterMap.keySet().size()) * 100) + "%)");
322  clusterMultiplier *= 2;
323  }
324  }
325  }
326  if (!silent) {
327  log.info(" Clusters processed: " + clusterCounter + " (Done)");
328  log.info("DensityCluster list built.");
329  }
330  }
331 
332  // lost list must be made up of clusters without Id.
333  }
334 
335 
353  public void writeClustersToFile(String filename) {
354 
355  int clusterCount = 0;
356  int clusterMultiplier = 1;
357  int totalClusters = clusterList.size();
358  if (!silent) {
359  log.info("Writing a total of " + totalClusters + " to file.");
360  }
361 
362  try (BufferedWriter output = IOUtils.getBufferedWriter(filename);
363  CSVPrinter printer = new CSVPrinter(output, CSVFormat.Builder.create().setHeader(csvHeader).setDelimiter(delimiter).build())) {
364  for (Cluster c : clusterList) {
365  c.setCenterOfGravity();
366  Coord center = c.getCenterOfGravity();
367  printer.printRecord(
368  c.getId(),
369  String.format(Locale.US, "%.6f", center.getX()),
370  String.format(Locale.US, "%.6f", center.getY()),
371  c.getPoints().size()
372  );
373  clusterCount++;
374  // Report progress
375  if (!silent) {
376  if (clusterCount == clusterMultiplier) {
377  log.info(" Clusters written: " + clusterCount);
378  clusterMultiplier *= 2;
379  }
380  }
381  }
382  if (!silent) {
383  log.info(" Clusters written: " + clusterCount + " (Done)");
384  }
385  } catch (IOException e) {
386  log.error("Could not write cluster to file.", e);
387  }
388  }
389 
390  public List<Cluster> getClusterList() {
391  return clusterList;
392  }
393 
394 
396  return quadTree;
397  }
398 
399 
400  public void setDelimiter(String delimiter) {
401  this.delimiter = delimiter;
402  }
403 
404  public Map<Id<Coord>, ClusterActivity> getLostPoints() {
405  return this.lostPoints;
406  }
407 
408 }
static BufferedWriter getBufferedWriter(URL url, Charset charset, boolean append)
Definition: IOUtils.java:390
static< T > Id< T > create(final long key, final Class< T > type)
Definition: Id.java:68
boolean put(final double x, final double y, final T value)
Definition: QuadTree.java:86
Collection< T > getDisk(final double x, final double y, final double distance)
Definition: QuadTree.java:142