# encoding: utf-8
########################################
# Hash데이터를 group by하기
#
# 만든이 : 정민철 (deajang 골뱅이 gmail.com)
# 2012.03.10 20:01
########################################
class GroupHash
def initialize
@source = []
@group_by_result = nil
@select_field = {}
end
# @sorce 데이터를 읽어서 그룹핑한다.
def group(&groupKey)
@group_by_result = {} #초기화
@source.each do |row|
row.each do |colName, colValue|
if @select_field.include?(colName)
@select_field[colName].each do |methodName|
self.method(methodName).call(groupKey.call(row), colName, colValue, "#{methodName}(#{colName})")
end
end
end
end
return @group_by_result
end
#데이터 추가
def add(map)
@source << map
end
# 데이터 추가
def addAll(mapList)
mapList.each do |map|
add(map)
end
end
# 데이터를 추출할 필드와 함수명 정의
def set_field(field_query)
field_query.split(/\s*,\s*/).each do |field|
field_func = _get_function_name(field)
field_name = _get_field_name(field);
if field_func.nil? || field_name.nil?
puts "ParseError: unknown field query. (unexpected = \"#{field}\")"
@select_field = nil
break
else
_map_in_append!(@select_field, field_name, field_func)
end
end
end
private
# map[key] << value
def _map_in_append!(map, key, value)
return false if map.nil?
map[key] = [] unless map.include?(key)
map[key] << value
return true
end
# func(fieldName) #=> func
def _get_function_name(token)
if token =~ /^\s*([a-zA-Z_]+)[(][a-zA-Z_]+[a-zA-Z0-9_-]*[)]\s*$/
return $1
end
end
# func(fieldName) #=> fieldName
def _get_field_name(token)
if token =~ /^\s*[a-zA-Z_]+[(]([a-zA-Z_]+[a-zA-Z0-9_-]*)[)]\s*$/
return $1
end
end
def _selet_group_value(groupKey, saveFieldName)
if @group_by_result.include?(groupKey)
if @group_by_result[groupKey].include?(saveFieldName)
return (@group_by_result[groupKey])[saveFieldName]
end
end
return nil
end
def _save_group_value(groupKey, value, saveFieldName)
unless @group_by_result.include?(groupKey)
@group_by_result[groupKey] = {}
end
unless @group_by_result[groupKey].include?(saveFieldName)
(@group_by_result[groupKey])[saveFieldName] = {}
end
(@group_by_result[groupKey])[saveFieldName] = value
end
########################################################
# 필드의 내장함수를 정의하는곳
#
# groupKey : 그룹을 짓는 기준값
# fieldName: 현재 읽어온 필드명
# value : 현재읽어온 필드의 값
# saveFieldName: select문에 정의된 필드명으로 그룹핑할때 내부적으로 key로 사용
#
# @see
# _selet_group_value(groupKey, fieldName, value, saveFieldName) : 값을 찾을때
# _save_group_value(groupKey, fieldName, value, saveFieldName) : 값을 저장할때
########################################################
# 합계
def sum(groupKey, fieldName, value, saveFieldName)
preval = _selet_group_value(groupKey, saveFieldName)
unless value.nil?
_save_group_value(groupKey, preval.to_i + value.to_i, saveFieldName)
end
end
#카운팅 (nil이 아닌 필드만 셈)
def count(groupKey, fieldName, value, saveFieldName)
preval = _selet_group_value(groupKey, saveFieldName)
unless value.nil?
_save_group_value(groupKey, preval.to_i + 1, saveFieldName)
end
end
#최대값
def max(groupKey, fieldName, value, saveFieldName)
preval = _selet_group_value(groupKey, saveFieldName)
unless value.nil?
max = preval
max = value if(preval.nil? || preval < value )
_save_group_value(groupKey, max, saveFieldName)
end
end
#최소값
def min(groupKey, fieldName, value, saveFieldName)
preval = _selet_group_value(groupKey, saveFieldName)
unless value.nil?
min = preval
min = value if(preval.nil? || preval > value )
_save_group_value(groupKey, min, saveFieldName)
end
end
#처음 매칭되는값
def first(groupKey, fieldName, value, saveFieldName)
preval = _selet_group_value(groupKey, saveFieldName)
if preval.nil?
_save_group_value(groupKey, value, saveFieldName)
end
end
end
[테스트용 코드]
###############
## 테스트 해보는 코드
###############
obj = GroupHash.new
obj.addAll(
[
{"name"=>"minchul-1.txt", "basetime"=>"20120101", "filesize"=>100, "comment"=>nil},
{"name"=>"minchul-java.txt", "basetime"=>"20120101", "filesize"=>32, "comment"=>nil},
{"name"=>"junghoon-java.txt", "basetime"=>"20120101", "filesize"=>232, "comment"=>nil},
{"name"=>"jihyun-c.txt", "basetime"=>"20120102", "filesize"=>433, "comment"=>"블라블라"},
{"name"=>"asdf.jpg", "basetime"=>"20120102", "filesize"=>12, "comment"=>nil},
{"name"=>"dddddd.jpg", "basetime"=>"20120102", "filesize"=>34, "comment"=>nil},
{"name"=>"test.jpg", "basetime"=>"20120102", "filesize"=>555, "comment"=>nil},
{"name"=>"junghoon-python.pdf","basetime"=>"20110101", "filesize"=>3300, "comment"=>nil},
{"name"=>"jihyun-php.txt","basetime"=>"20110301", "filesize"=>1, "comment"=>"뭐지이건"}
]
)
obj.set_field("count(comment), sum(filesize), max(filesize), min(filesize), first(name)")
# 날짜별로 그룹바이 한다.
result = obj.group { |map|
map["basetime"]
}
puts "1]---------------"
result.each do |key, val|
puts "#{key} : #{val}"
end
# 2011년도 데이터만 찾아본다
result = obj.group { |map|
map["basetime"].slice(0,4)
}
puts "2]---------------"
result.each do |key, val|
puts "#{key} : #{val}"
end
[실행결과]
1]---------------
20120101 : {"first(name)"=>"minchul-1.txt", "sum(filesize)"=>364, "max(filesize)"=>232, "min(filesize)"=>32}
20120102 : {"first(name)"=>"jihyun-c.txt", "sum(filesize)"=>1034, "max(filesize)"=>555, "min(filesize)"=>12, "count(comment)"=>1}
20110101 : {"first(name)"=>"junghoon-python.pdf", "sum(filesize)"=>3300, "max(filesize)"=>3300, "min(filesize)"=>3300}
20110301 : {"first(name)"=>"jihyun-php.txt", "sum(filesize)"=>1, "max(filesize)"=>1, "min(filesize)"=>1, "count(comment)"=>1}
2]---------------
2012 : {"first(name)"=>"minchul-1.txt", "sum(filesize)"=>1398, "max(filesize)"=>555, "min(filesize)"=>12, "count(comment)"=>1}
2011 : {"first(name)"=>"junghoon-python.pdf", "sum(filesize)"=>3301, "max(filesize)"=>3300, "min(filesize)"=>1, "count(comment)"=>1}