Skip to content

Commit ef05447

Browse files
authored
Merge pull request #532 from wsp-sag/ft_vis_1
Visualization Pull Request
2 parents c5f437e + 8110573 commit ef05447

39 files changed

Lines changed: 4375 additions & 32 deletions

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,3 +72,4 @@ _test_est
7272
*_local/
7373
*_local.*
7474

75+
**/output/

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
ActivitySim
22
===========
33

4-
[![Build Status](https://travis-ci.org/ActivitySim/activitysim.svg?branch=master)](https://travis-ci.org/ActivitySim/activitysim)[![Coverage Status](https://coveralls.io/repos/github/ActivitySim/activitysim/badge.svg?branch=master)](https://coveralls.io/github/ActivitySim/activitysim?branch=master)
4+
[![Build Status](https://travis-ci.com/ActivitySim/activitysim.svg?branch=master)](https://travis-ci.org/github/ActivitySim/activitysim)[![Coverage Status](https://coveralls.io/repos/github/ActivitySim/activitysim/badge.svg?branch=master)](https://coveralls.io/github/ActivitySim/activitysim?branch=master)
55

66
The mission of the ActivitySim project is to create and maintain advanced, open-source,
77
activity-based travel behavior modeling software based on best software development
@@ -15,4 +15,4 @@ and benefit from contributions of other agency partners.
1515

1616
## Documentation
1717

18-
https://activitysim.github.io/activitysim
18+
https://activitysim.github.io/activitysim
Lines changed: 335 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,354 @@
11
# ActivitySim
22
# See full license in LICENSE.txt.
33
import logging
4-
import sys
4+
import os
5+
6+
import numpy as np
57
import pandas as pd
8+
from activitysim.abm.models.trip_matrices import annotate_trips
9+
from activitysim.core import config, expressions, inject, pipeline
10+
11+
logger = logging.getLogger(__name__)
612

7-
from activitysim.core import pipeline
8-
from activitysim.core import inject
9-
from activitysim.core import config
1013

11-
from activitysim.core.config import setting
14+
def wrap_skims(
15+
network_los: pipeline.Pipeline,
16+
trips_merged: pd.DataFrame
17+
) -> dict[str, object]:
18+
"""
19+
Retrieve skim wrappers for merged trips.
1220
13-
logger = logging.getLogger(__name__)
21+
For each record in `trips_merged`, retrieve skim wrappers for appropriate time of day.
22+
23+
Returns dictionary of skims wrappers that are available for use in expressions defined
24+
in `summarize_preprocessor.csv`
25+
"""
26+
skim_dict = network_los.get_default_skim_dict()
27+
28+
trips_merged['start_tour_period'] = network_los.skim_time_period_label(
29+
trips_merged['start']
30+
)
31+
trips_merged['end_tour_period'] = network_los.skim_time_period_label(
32+
trips_merged['end']
33+
)
34+
trips_merged['trip_period'] = network_los.skim_time_period_label(
35+
trips_merged['depart']
36+
)
37+
38+
tour_odt_skim_stack_wrapper = skim_dict.wrap_3d(
39+
orig_key='origin_tour',
40+
dest_key='destination_tour',
41+
dim3_key='start_tour_period',
42+
)
43+
tour_dot_skim_stack_wrapper = skim_dict.wrap_3d(
44+
orig_key='destination_tour', dest_key='origin_tour', dim3_key='end_tour_period'
45+
)
46+
trip_odt_skim_stack_wrapper = skim_dict.wrap_3d(
47+
orig_key='origin_trip', dest_key='destination_trip', dim3_key='trip_period'
48+
)
49+
50+
tour_od_skim_stack_wrapper = skim_dict.wrap('origin_tour', 'destination_tour')
51+
trip_od_skim_stack_wrapper = skim_dict.wrap('origin_trip', 'destination_trip')
52+
53+
return {
54+
"tour_odt_skims": tour_odt_skim_stack_wrapper,
55+
"tour_dot_skims": tour_dot_skim_stack_wrapper,
56+
"trip_odt_skims": trip_odt_skim_stack_wrapper,
57+
"tour_od_skims": tour_od_skim_stack_wrapper,
58+
"trip_od_skims": trip_od_skim_stack_wrapper,
59+
}
60+
61+
62+
DEFAULT_BIN_LABEL_FORMAT = "{left:,.2f} - {right:,.2f}"
63+
64+
65+
def construct_bin_labels(bins: pd.Series, label_format: str) -> pd.Series:
66+
"""
67+
Construct bin label strings based on intervals (pd.Interval) in `bins`
68+
69+
`label_format` is an F-string format that can reference the following variables:
70+
- 'left': Bin minimum
71+
- 'right': Min maximum
72+
- 'mid': Bin center
73+
- 'rank': Bin rank (lowest to highest)
74+
75+
For example: '{left:,.2f} - {right:,.2f}' might yield '0.00 - 1.00'
76+
"""
77+
left = bins.apply(lambda x: x.left)
78+
mid = bins.apply(lambda x: x.mid)
79+
right = bins.apply(lambda x: x.right)
80+
# Get integer ranks of bins (e.g., 1st, 2nd ... nth quantile)
81+
rank = mid.map(
82+
{
83+
x: sorted(mid.unique().tolist()).index(x) + 1 if pd.notnull(x) else np.nan
84+
for x in mid.unique()
85+
},
86+
na_action='ignore',
87+
)
88+
89+
def construct_label(label_format, bounds_dict):
90+
bounds_dict = {
91+
x: bound for x, bound in bounds_dict.items() if x in label_format
92+
}
93+
return label_format.format(**bounds_dict)
94+
95+
labels = pd.Series(
96+
[
97+
construct_label(label_format, {'left': lt, 'mid': md, 'right': rt, 'rank': rk})
98+
for lt, md, rt, rk in zip(left, mid, right, rank)
99+
],
100+
index=bins.index,
101+
)
102+
# Convert to numeric if possible
103+
labels = pd.to_numeric(labels, errors='ignore')
104+
return labels
105+
106+
107+
def quantiles(
108+
data: pd.Series,
109+
bins: pd.Series,
110+
label_format: str = DEFAULT_BIN_LABEL_FORMAT
111+
) -> pd.Series:
112+
"""
113+
Construct quantiles from a Series given a number of bins.
114+
115+
For example: set bins = 5 to construct quintiles.
116+
117+
data: Input Series
118+
bins: Number of bins
119+
label_format: F-string format for bin labels
120+
Bins are labeled with 'min - max' ranges by default.
121+
122+
Returns a Series indexed by labels
123+
"""
124+
vals = data.sort_values()
125+
# qcut a ranking instead of raw values to deal with high frequencies of the same value
126+
# (e.g., many 0 values) that may span multiple bins
127+
ranks = vals.rank(method='first')
128+
bins = pd.qcut(ranks, bins, duplicates='drop')
129+
bins = construct_bin_labels(bins, label_format)
130+
return bins
131+
132+
133+
def spaced_intervals(
134+
data: pd.Series,
135+
lower_bound: float,
136+
interval: float,
137+
label_format: str = DEFAULT_BIN_LABEL_FORMAT,
138+
) -> pd.Series:
139+
"""
140+
Construct evenly-spaced intervals from a Series given a starting value and bin size.
141+
142+
data: Input Series
143+
lower_bound: Minimum value of lowest bin
144+
interval: Bin spacing above the `lower_bound`
145+
label_format: F-string format for bin labels
146+
Bins are labeled with 'min - max' ranges by default.
147+
148+
Returns a Series indexed by labels
149+
"""
150+
if lower_bound == 'min':
151+
lower_bound = data.min()
152+
breaks = np.arange(lower_bound, data.max() + interval, interval)
153+
bins = pd.cut(data, breaks, include_lowest=True)
154+
bins = construct_bin_labels(bins, label_format)
155+
return bins
156+
157+
158+
def equal_intervals(
159+
data: pd.Series,
160+
bins: int,
161+
label_format: str = DEFAULT_BIN_LABEL_FORMAT
162+
) -> pd.Series:
163+
"""
164+
Construct equally-spaced intervals across the entire range of a Series.
165+
166+
data: Input Series
167+
bins: Number of bins
168+
label_format: F-string format for bin labels
169+
Bins are labeled with 'min - max' ranges by default.
170+
171+
Returns a Series indexed by labels
172+
"""
173+
bins = pd.cut(data, bins, include_lowest=True)
174+
bins = construct_bin_labels(bins, label_format)
175+
return bins
176+
177+
178+
def manual_breaks(
179+
data: pd.Series,
180+
bin_breaks: list,
181+
labels: list = None,
182+
label_format: str = DEFAULT_BIN_LABEL_FORMAT
183+
) -> pd.Series:
184+
"""
185+
Classify numeric data in a Pandas Series into manually-defined bins.
186+
187+
data: Input Series
188+
bin_breaks: Break points between bins
189+
labels: Manually-defined labels for each bin (`len(labels)` == `len(bin_breaks) + 1`)
190+
label_format: F-string format for bin labels if not defined by `labels`
191+
Bins are labeled with 'min - max' ranges by default.
192+
193+
Returns a Series indexed by labels
194+
"""
195+
if isinstance(labels, list):
196+
return pd.cut(data, bin_breaks, labels=labels, include_lowest=True)
197+
else:
198+
bins = pd.cut(data, bin_breaks, include_lowest=True)
199+
bins = construct_bin_labels(bins, label_format)
200+
return bins
14201

15202

16203
@inject.step()
17-
def write_summaries(output_dir):
204+
def summarize(
205+
network_los: pipeline.Pipeline,
206+
persons: pd.DataFrame,
207+
persons_merged: pd.DataFrame,
208+
households: pd.DataFrame,
209+
households_merged: pd.DataFrame,
210+
trips: pd.DataFrame,
211+
tours: pd.DataFrame,
212+
tours_merged: pd.DataFrame,
213+
land_use: pd.DataFrame,
214+
):
215+
"""
216+
A standard model that uses expression files to summarize pipeline tables for vizualization.
217+
218+
Summaries are configured in `summarize.yaml`, including specification of the
219+
expression file (`summarize.csv` by default).
220+
221+
Columns in pipeline tables can also be sliced and aggregated prior to summarization.
222+
This preprocessing is configured in `summarize.yaml`.
223+
224+
Outputs a seperate csv summary file for each expression;
225+
outputs starting with '_' are saved as temporary local variables.
226+
"""
227+
trace_label = 'summarize'
228+
model_settings_file_name = 'summarize.yaml'
229+
model_settings = config.read_model_settings(model_settings_file_name)
230+
231+
output_location = (
232+
model_settings['OUTPUT'] if 'OUTPUT' in model_settings else 'summaries'
233+
)
234+
os.makedirs(config.output_file_path(output_location), exist_ok=True)
235+
236+
spec = pd.read_csv(
237+
config.config_file_path(model_settings['SPECIFICATION']), comment='#'
238+
)
239+
240+
# Load dataframes from pipeline
241+
persons = persons.to_frame()
242+
persons_merged = persons_merged.to_frame()
243+
households = households.to_frame()
244+
households_merged = households_merged.to_frame()
245+
trips = trips.to_frame()
246+
tours = tours_merged.to_frame()
247+
tours_merged = tours_merged.to_frame()
248+
land_use = land_use.to_frame()
249+
250+
# - trips_merged - merge trips and tours_merged
251+
trips_merged = pd.merge(
252+
trips,
253+
tours_merged.drop(columns=['person_id', 'household_id']),
254+
left_on='tour_id',
255+
right_index=True,
256+
suffixes=['_trip', '_tour'],
257+
how="left",
258+
)
259+
260+
# Add dataframes as local variables
261+
locals_d = {
262+
'persons': persons,
263+
'persons_merged': persons_merged,
264+
'households': households,
265+
'households_merged': households_merged,
266+
'trips': trips,
267+
'trips_merged': trips_merged,
268+
'tours': tours_merged,
269+
'tours_merged': tours_merged,
270+
'land_use': land_use,
271+
}
272+
273+
skims = wrap_skims(network_los, trips_merged)
274+
275+
# Annotate trips_merged
276+
expressions.annotate_preprocessors(
277+
trips_merged, locals_d, skims, model_settings, 'summarize'
278+
)
279+
280+
for table_name, df in locals_d.items():
281+
if table_name in model_settings:
282+
283+
meta = model_settings[table_name]
284+
df = eval(table_name)
285+
286+
if 'AGGREGATE' in meta and meta['AGGREGATE']:
287+
for agg in meta['AGGREGATE']:
288+
assert set(('column', 'label', 'map')) <= agg.keys()
289+
df[agg['label']] = (
290+
df[agg['column']].map(agg['map']).fillna(df[agg['column']])
291+
)
292+
293+
if 'BIN' in meta and meta['BIN']:
294+
for slicer in meta['BIN']:
295+
if slicer['type'] == 'manual_breaks':
296+
df[slicer['label']] = manual_breaks(
297+
df[slicer['column']], slicer['bin_breaks'], slicer['bin_labels']
298+
)
299+
300+
elif slicer['type'] == 'quantiles':
301+
df[slicer['label']] = quantiles(
302+
df[slicer['column']], slicer['bins'], slicer['label_format']
303+
)
304+
305+
elif slicer['type'] == 'spaced_intervals':
306+
df[slicer['label']] = spaced_intervals(
307+
df[slicer['column']],
308+
slicer['lower_bound'],
309+
slicer['interval'],
310+
slicer['label_format'],
311+
)
312+
313+
elif slicer['type'] == 'equal_intervals':
314+
df[slicer['label']] = equal_intervals(
315+
df[slicer['column']], slicer['bins'], slicer['label_format']
316+
)
18317

19-
summary_settings_name = 'output_summaries'
20-
summary_file_name = 'summaries.txt'
318+
# Output pipeline tables for expression development
319+
if model_settings['EXPORT_PIPELINE_TABLES'] is True:
320+
pipeline_table_dir = os.path.join(output_location, 'pipeline_tables')
321+
os.makedirs(config.output_file_path(pipeline_table_dir), exist_ok=True)
322+
for name, df in locals_d.items():
323+
df.to_csv(config.output_file_path(os.path.join(pipeline_table_dir, f'{name}.csv')))
21324

22-
summary_settings = setting(summary_settings_name)
325+
# Add classification functions to locals
326+
locals_d.update(
327+
{
328+
'quantiles': quantiles,
329+
'spaced_intervals': spaced_intervals,
330+
'equal_intervals': equal_intervals,
331+
'manual_breaks': manual_breaks,
332+
}
333+
)
23334

24-
if summary_settings is None:
25-
logger.info("No {summary_settings_name} specified in settings file. Nothing to write.")
26-
return
335+
for i, row in spec.iterrows():
27336

28-
summary_dict = summary_settings
337+
out_file = row['Output']
338+
expr = row['Expression']
29339

30-
mode = 'wb' if sys.version_info < (3,) else 'w'
31-
with open(config.output_file_path(summary_file_name), mode) as output_file:
340+
# Save temporary variables starting with underscores in locals_d
341+
if out_file.startswith('_'):
32342

33-
for table_name, column_names in summary_dict.items():
343+
logger.debug(f'Temp Variable: {expr} -> {out_file}')
34344

35-
df = pipeline.get_table(table_name)
345+
locals_d[out_file] = eval(expr, globals(), locals_d)
346+
continue
36347

37-
for c in column_names:
38-
n = 100
39-
empty = (df[c] == '') | df[c].isnull()
348+
logger.debug(f'Summary: {expr} -> {out_file}.csv')
40349

41-
print(f"\n### {table_name}.{c} type: {df.dtypes[c]} rows: {len(df)} ({empty.sum()} empty)\n\n",
42-
file=output_file)
43-
print(df[c].value_counts().nlargest(n), file=output_file)
350+
resultset = eval(expr, globals(), locals_d)
351+
resultset.to_csv(
352+
config.output_file_path(os.path.join(output_location, f'{out_file}.csv')),
353+
index=False,
354+
)

0 commit comments

Comments
 (0)