group.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351
  1. from __future__ import unicode_literals
  2. import calendar
  3. import datetime
  4. from functools import reduce
  5. from itertools import groupby
  6. import logging
  7. from flask_appbuilder._compat import as_unicode
  8. from flask_babel import lazy_gettext as _
  9. from .. import const as c
  10. log = logging.getLogger(__name__)
  11. def aggregate(label=""):
  12. """
  13. Use this decorator to set a label for your aggregation functions on charts.
  14. :param label:
  15. The label to complement with the column
  16. """
  17. def wrap(f):
  18. f._label = label
  19. return f
  20. return wrap
  21. @aggregate(_("Count of"))
  22. def aggregate_count(items, col):
  23. """
  24. Function to use on Group by Charts.
  25. accepts a list and returns the count of the list's items
  26. """
  27. return len(list(items))
  28. @aggregate(_("Sum of"))
  29. def aggregate_sum(items, col):
  30. """
  31. Function to use on Group by Charts.
  32. accepts a list and returns the sum of the list's items
  33. """
  34. return sum(getattr(item, col) for item in items)
  35. @aggregate(_("Avg. of"))
  36. def aggregate_avg(items, col):
  37. """
  38. Function to use on Group by Charts.
  39. accepts a list and returns the average of the list's items
  40. """
  41. try:
  42. return aggregate_sum(items, col) / aggregate_count(items, col)
  43. except Exception:
  44. log.warning(c.LOGMSG_WAR_DBI_AVG_ZERODIV)
  45. return 0.0
  46. class BaseGroupBy(object):
  47. column_name = ""
  48. name = ""
  49. aggregate_func = None
  50. aggregate_col = ""
  51. def __init__(
  52. self, column_name, name, aggregate_func=aggregate_count, aggregate_col=""
  53. ):
  54. """
  55. Constructor.
  56. :param column_name:
  57. Model field name
  58. :param name:
  59. The group by name
  60. """
  61. self.column_name = column_name
  62. self.name = name
  63. self.aggregate_func = aggregate_func
  64. self.aggregate_col = aggregate_col
  65. def apply(self, data):
  66. """
  67. Override this to implement you own new filters
  68. """
  69. pass
  70. def get_group_col(self, item):
  71. return getattr(item, self.column_name)
  72. def get_format_group_col(self, item):
  73. return item
  74. def get_aggregate_col_name(self):
  75. if self.aggregate_col:
  76. return self.aggregate_func.__name__ + "_" + self.aggregate_col
  77. else:
  78. return self.aggregate_func.__name__
  79. def __repr__(self):
  80. return self.name
  81. class GroupByCol(BaseGroupBy):
  82. def _apply(self, data):
  83. data = sorted(data, key=self.get_group_col)
  84. json_data = dict()
  85. json_data["cols"] = [
  86. {"id": self.column_name, "label": self.column_name, "type": "string"},
  87. {
  88. "id": self.aggregate_func.__name__ + "_" + self.column_name,
  89. "label": self.aggregate_func.__name__ + "_" + self.column_name,
  90. "type": "number",
  91. },
  92. ]
  93. json_data["rows"] = []
  94. for grouped, items in groupby(data, self.get_group_col):
  95. aggregate_value = self.aggregate_func(items, self.aggregate_col)
  96. json_data["rows"].append(
  97. {
  98. "c": [
  99. {"v": self.get_format_group_col(grouped)},
  100. {"v": aggregate_value},
  101. ]
  102. }
  103. )
  104. return json_data
  105. def apply(self, data):
  106. data = sorted(data, key=self.get_group_col)
  107. return [
  108. [
  109. self.get_format_group_col(grouped),
  110. self.aggregate_func(items, self.aggregate_col),
  111. ]
  112. for (grouped, items) in groupby(data, self.get_group_col)
  113. ]
  114. class GroupByDateYear(BaseGroupBy):
  115. def apply(self, data):
  116. data = sorted(data, key=self.get_group_col)
  117. return [
  118. [
  119. self.get_format_group_col(grouped),
  120. self.aggregate_func(items, self.aggregate_col),
  121. ]
  122. for (grouped, items) in groupby(data, self.get_group_col)
  123. ]
  124. def get_group_col(self, item):
  125. value = getattr(item, self.column_name)
  126. if value:
  127. return value.year
  128. class GroupByDateMonth(BaseGroupBy):
  129. def apply(self, data):
  130. data = sorted(data, key=self.get_group_col)
  131. return [
  132. [
  133. self.get_format_group_col(grouped),
  134. self.aggregate_func(items, self.aggregate_col),
  135. ]
  136. for (grouped, items) in groupby(data, self.get_group_col)
  137. if grouped
  138. ]
  139. def get_group_col(self, item):
  140. value = getattr(item, self.column_name)
  141. if value:
  142. return value.year, value.month
  143. def get_format_group_col(self, item):
  144. return calendar.month_name[item[1]] + " " + str(item[0])
  145. class BaseProcessData(object):
  146. """
  147. Base class to process data.
  148. It will group data by one or many columns or functions.
  149. The aggregation is made by an already defined function, or by a custom function
  150. :group_bys_cols: A list of columns or functions to group data.
  151. :aggr_by_cols: A list of tuples [(<AGGR FUNC>,'<COLNAME>'),...].
  152. :formatter_by_cols: A dict.
  153. """
  154. group_bys_cols = None
  155. # ['<COLNAME>',<FUNC>, ....]
  156. aggr_by_cols = None
  157. # [(<AGGR FUNC>,'<COLNAME>'),...]
  158. formatter_by_cols = {}
  159. # {<FUNC>: '<COLNAME>',...}
  160. def __init__(self, group_by_cols, aggr_by_cols, formatter_by_cols):
  161. self.group_bys_cols = group_by_cols
  162. self.aggr_by_cols = aggr_by_cols
  163. self.formatter_by_cols = formatter_by_cols
  164. def attrgetter(self, *items):
  165. if len(items) == 1:
  166. attr = items[0]
  167. def g(obj):
  168. return self.resolve_attr(obj, attr)
  169. else:
  170. def g(obj):
  171. return tuple(self.resolve_attr(obj, attr) for attr in items)
  172. return g
  173. def resolve_attr(self, obj, attr):
  174. if not hasattr(obj, attr):
  175. # it's an inner obj attr
  176. return reduce(getattr, attr.split("."), obj)
  177. if hasattr(getattr(obj, attr), "__call__"):
  178. # its a function
  179. return getattr(obj, attr)()
  180. else:
  181. # it's an attribute
  182. return getattr(obj, attr)
  183. def format_columns(self, *values):
  184. if len(values) == 1:
  185. return self.format_column(self.group_bys_cols[0], values[0])
  186. else:
  187. return tuple(
  188. self.format_column(item, value)
  189. for item, value in (self.group_bys_cols, values)
  190. )
  191. def format_column(self, item, value):
  192. if item in self.formatter_by_cols:
  193. return self.formatter_by_cols[item](value)
  194. else:
  195. return value
  196. def apply(self, data):
  197. pass
  198. def to_dict(self, data):
  199. ret = []
  200. for item in data:
  201. row = {}
  202. if not isinstance(item[0], tuple):
  203. row[self.group_bys_cols[0]] = str(item[0])
  204. else:
  205. for group_col_data, i in zip(item[0], enumerate(item[0])):
  206. row[self.group_bys_cols[i]] = str(group_col_data)
  207. for col_data, i in zip(item[1:], enumerate(item[1:])):
  208. log.debug("%s,%s", col_data, i)
  209. key = self.aggr_by_cols[i].__name__ + self.aggr_by_cols[i]
  210. if isinstance(col_data, datetime.date):
  211. row[key] = str(col_data)
  212. else:
  213. row[key] = col_data
  214. ret.append(row)
  215. return ret
  216. def to_json(self, data, labels=None):
  217. """
  218. Will return a dict with Google JSON structure for charts
  219. The Google structure::
  220. {
  221. cols: [{id:<COL_NAME>, label:<LABEL FOR COL>, type: <COL TYPE>}, ...]
  222. rows: [{c: [{v: <COL VALUE}, ...], ... ]
  223. }
  224. :param data:
  225. :param labels: dict with labels to include on Google JSON strcut
  226. :return: dict with Google JSON structure
  227. """
  228. labels = labels or dict()
  229. json_data = dict()
  230. json_data["cols"] = []
  231. # Create Structure to identify the grouped columns
  232. for group_col in self.group_bys_cols:
  233. label = "" or as_unicode(labels[group_col])
  234. json_data["cols"].append(
  235. {"id": group_col, "label": label, "type": "string"}
  236. )
  237. # Create Structure to identify the Aggregated columns
  238. for aggr_col in self.aggr_by_cols:
  239. if isinstance(aggr_col, tuple):
  240. label_key = aggr_col[0].__name__ + aggr_col[1]
  241. aggr_col = aggr_col[1]
  242. else:
  243. label_key = aggr_col
  244. label = "" or as_unicode(labels[label_key])
  245. json_data["cols"].append({"id": aggr_col, "label": label, "type": "number"})
  246. # Create Structure with the data
  247. json_data["rows"] = []
  248. for item in data:
  249. row = {"c": []}
  250. if not isinstance(item[0], tuple):
  251. row["c"].append({"v": "{0}".format(item[0])})
  252. else:
  253. for group_col_data in item[0]:
  254. row["c"].append({"v": "{0}".format(group_col_data)})
  255. for col_data in item[1:]:
  256. if isinstance(col_data, datetime.date):
  257. row["c"].append({"v": "{0}".format(col_data)})
  258. else:
  259. row["c"].append({"v": col_data})
  260. json_data["rows"].append(row)
  261. return json_data
  262. class DirectProcessData(BaseProcessData):
  263. def apply(self, data, sort=True):
  264. group_by = self.group_bys_cols[0]
  265. if sort:
  266. data = sorted(data, key=self.attrgetter(group_by))
  267. result = []
  268. for item in data:
  269. result_item = [self.format_columns(self.attrgetter(group_by)(item))]
  270. for aggr_by_col in self.aggr_by_cols:
  271. result_item.append(self.attrgetter(aggr_by_col)(item))
  272. result.append(result_item)
  273. return result
  274. class GroupByProcessData(BaseProcessData):
  275. """
  276. Groups by data by chosen columns (property group_bys_cols).
  277. :data: A list of objects
  278. :sort: boolean, if true python will sort the data
  279. :return: A List of lists with group column and aggregation
  280. """
  281. def apply(self, data, sort=True):
  282. if sort:
  283. data = sorted(data, key=self.attrgetter(*self.group_bys_cols))
  284. result = []
  285. for grouped, items in groupby(data, key=self.attrgetter(*self.group_bys_cols)):
  286. items = list(items)
  287. result_item = [self.format_columns(grouped)]
  288. for aggr_by_col in self.aggr_by_cols:
  289. result_item.append(aggr_by_col[0](items, aggr_by_col[1]))
  290. result.append(result_item)
  291. return result