왜냐... 몽고DB에서 json데이터를 집어 넣는데...
이때의 필드명이 map의 key값과 매칭되는데
다른 이름으로 쓰고 싶다는것이다. 흠흠
근데 내가 구현한 어제 코드에서는 그룹핑된 hash의 key값이 고정적이라는것이다.
예를들어, "sum(field)" => "value" 의 sum(field) 같이 말이지
그래서 sql의 as 같은 닉네임과,
손보는김에 where절을 블럭으로 구현해서 쓰도록 했다..
뭔소린지 모르겠으니, sql문과 비교해보면 이런느낌...
sql문 | 내가 만든 코드와 비교하면 |
select max(size) as max_size, max(size) , sum(size) , count(key) , min(size) from table where filedate.substr(1,4) = '2011' group by filedate.substr(1,4) |
# count, sum, max, min, first 라는 기능은 구현해놨다. table = { "size" => "20" ... 생략 ... } obj = GroupHash.new obj.set_field("max(size) as max_size, max(size),sum(size),count(key), min(size)") obj.addAll( table ) obj.where do map["filedate"].slice(0,4) == "2011" end result = obj.group { |map| map["filedate"].slice(0,4) } puts result |
1. 소스코드
# encoding: utf-8
########################################
# Hash데이터를 group by하기
#
# 만든이 : 정민철 (deajang@gmail.com)
#
# History (ver 0.1.1)
# 2012.03.11 : where 기능구현
# 2012.03.11 : as field 기능구현 (닉네임)
# 2012.03.10 : group by 기능 구현
########################################
class GroupHash
def initialize
@source = []
@group_by_result = nil
@select_field = {}
@nickname_field = {}
@where_lambda = lambda{|nodata| return true }
end
def where(&where_lambda)
@where_lambda = where_lambda
end
# @sorce 데이터를 읽어서 그룹핑한다.
def group(&groupKey)
@group_by_result = {} #초기화
@source.each do |row|
#where에서 걸러내기
unless @where_lambda.call(row)
next
end
row.each do |colName, colValue|
if @select_field.include?(colName)
@select_field[colName].each do |methodName|
self.method(methodName).call(groupKey.call(row), colName, colValue, _make_group_field_name(methodName,colName) )
end
end
end
end
return @group_by_result
end
#데이터 추가
def add(map)
@source << map
end
# 데이터 추가
def add_all(mapList)
mapList.each do |map|
add(map)
end
end
# 데이터를 추출할 필드와 함수명 정의
def set_field(field_query)
field_query.split(/\s*,\s*/).each do |field|
field_func = _get_function_name(field)
field_name = _get_field_name(field)
field_nick = "#{field_func}(#{field_name})"
# nickname 추가
if @nickname_field.include?(field_nick)
puts "Warring: exist nick name. (unexpected = '#{field_nick}'"
@select_field = nil
break
else
unless _get_nick_name(field).nil?
@nickname_field["#{field_nick}"] = _get_nick_name(field)
end
end
if field_func.nil? || field_name.nil?
puts "ParseError: unknown field query. (unexpected = \"#{field}\")"
@select_field = nil
break
else
_map_in_append!(@select_field, field_name, field_func)
end
end
end
private
# map[key] << value
def _map_in_append!(map, key, value)
return false if map.nil?
map[key] = [] unless map.include?(key)
map[key] << value
return true
end
# ex) sum(fieldname) as nicname #=> sum
def _get_function_name(token)
if token =~ /^\s*([a-zA-Z_]+)[(][a-zA-Z_]+[a-zA-Z0-9_-]*[)]*/
return $1
end
end
# ex) sum(fieldname) as nicname #=> fieldname
def _get_field_name(token)
if token =~ /^\s*[a-zA-Z_]+[(]([a-zA-Z_]+[a-zA-Z0-9_-]*)[)]*/
return $1
end
end
# ex) sum(fieldname) as nickname #=> nikcname
def _get_nick_name(token)
if token =~ /^\s*[a-zA-Z_]+[(][a-zA-Z_]+[a-zA-Z0-9_-]*[)]\s+as\s([a-z|A-Z_]+),?/
return $1
end
end
def _find_group_value(groupKey, groupFieldName)
if @group_by_result.include?(groupKey)
if @group_by_result[groupKey].include?(groupFieldName)
return (@group_by_result[groupKey])[groupFieldName]
end
end
return nil
end
def _save_group_value(groupKey, value, groupFieldName)
unless @group_by_result.include?(groupKey)
@group_by_result[groupKey] = {}
end
unless @group_by_result[groupKey].include?(groupFieldName)
(@group_by_result[groupKey])[groupFieldName] = {}
end
(@group_by_result[groupKey])[groupFieldName] = value
end
# group_unique_key
def _make_group_field_name(methodName, colName)
groupFieldName = "#{methodName}(#{colName})"
if @nickname_field.include?(groupFieldName)
return @nickname_field[groupFieldName]
else
return groupFieldName
end
end
########################################################
# 필드의 내장함수를 정의하는곳
#
# groupKey : 그룹을 짓는 기준값
# fieldName: 현재 읽어온 필드명
# value : 현재읽어온 필드의 값
# groupFieldName: select문에 정의된 필드명으로 그룹핑할때 내부적으로 key로 사용
#
# @see
# _find_group_value(groupKey, fieldName, value, groupFieldName) : 값을 찾을때
# _save_group_value(groupKey, fieldName, value, groupFieldName) : 값을 저장할때
########################################################
# 합계
def sum(groupKey, fieldName, value, groupFieldName)
preval = _find_group_value(groupKey, groupFieldName)
unless value.nil?
_save_group_value(groupKey, preval.to_i + value.to_i, groupFieldName)
end
end
#카운팅 (nil이 아닌 필드만 셈)
def count(groupKey, fieldName, value, groupFieldName)
preval = _find_group_value(groupKey, groupFieldName)
value = 0 if value.nil?
_save_group_value(groupKey, preval.to_i + 1, groupFieldName)
end
#최대값
def max(groupKey, fieldName, value, groupFieldName)
#puts "max : groupKey=#{groupKey},fieldName=#{fieldName}, value=#{value}, groupFieldName=#{groupFieldName} "
preval = _find_group_value(groupKey, groupFieldName)
unless value.nil?
max = preval
max = value if(preval.nil? || preval < value )
_save_group_value(groupKey, max, groupFieldName)
end
end
#최소값
def min(groupKey, fieldName, value, groupFieldName)
preval = _find_group_value(groupKey, groupFieldName)
unless value.nil?
min = preval
min = value if(preval.nil? || preval > value )
_save_group_value(groupKey, min, groupFieldName)
end
end
#처음 매칭되는값
def first(groupKey, fieldName, value, groupFieldName)
preval = _find_group_value(groupKey, groupFieldName)
if preval.nil?
_save_group_value(groupKey, value, groupFieldName)
end
end
end
2. 테스트 샘플코드
###############
## 샘플
###############
obj = GroupHash.new
obj.add_all(
[
{"name"=>"minchul-1.txt", "basetime"=>"20120101", "filesize"=>100, "comment"=>nil},
{"name"=>"minchul-java.txt", "basetime"=>"20120101", "filesize"=>32, "comment"=>nil},
{"name"=>"junghoon-java.txt", "basetime"=>"20120101", "filesize"=>232, "comment"=>nil},
{"name"=>"jihyun-c.txt", "basetime"=>"20120102", "filesize"=>433, "comment"=>"블라블라"},
{"name"=>"asdf.jpg", "basetime"=>"20120102", "filesize"=>12, "comment"=>nil},
{"name"=>"dddddd.jpg", "basetime"=>"20120102", "filesize"=>34, "comment"=>nil},
{"name"=>"test.jpg", "basetime"=>"20120102", "filesize"=>555, "comment"=>nil},
{"name"=>"junghoon-python.pdf","basetime"=>"20110101", "filesize"=>3300, "comment"=>nil},
{"name"=>"jihyun-php.txt","basetime"=>"20110301", "filesize"=>1, "comment"=>"뭐지이건"}
]
)
#
obj.set_field("count(comment) as comment_count, sum(filesize) as filesize_sum, max(filesize), min(filesize), first(name)")
# 날짜별로 그룹바이 한다.
result = obj.group { |map|
map["basetime"]
}
puts "1]---------------"
result.each do |key, val|
puts "#{key} : #{val}"
end
# 2011년도 데이터만 찾아본다
obj.where do |map|
map["basetime"].slice(0,4) == "2011"
end
result = obj.group { |map|
map["basetime"].slice(0,4)
}
puts "2]---------------"
result.each do |key, val|
puts "#{key} : #{val}"
end
3. 실행결과
1]---------------
20120101 : {"first(name)"=>"minchul-1.txt", "filesize_sum"=>364, "max(filesize)"=>232, "min(filesize)"=>32, "comment_count"=>3}
20120102 : {"first(name)"=>"jihyun-c.txt", "filesize_sum"=>1034, "max(filesize)"=>555, "min(filesize)"=>12, "comment_count"=>4}
20110101 : {"first(name)"=>"junghoon-python.pdf", "filesize_sum"=>3300, "max(filesize)"=>3300, "min(filesize)"=>3300, "comment_count"=>1}
20110301 : {"first(name)"=>"jihyun-php.txt", "filesize_sum"=>1, "max(filesize)"=>1, "min(filesize)"=>1, "comment_count"=>1}
2]---------------
2011 : {"first(name)"=>"junghoon-python.pdf", "filesize_sum"=>3301, "max(filesize)"=>3300, "min(filesize)"=>1, "comment_count"=>2}