You should use tf.extract_image_patches.This op collects patches from the input image, as if applying a convolution. It supports window sliding and overlapping.
def imagemakeHistBins(inPut, N, M=None):
if M is None:
M = N
B, H, W = inPut.get_shape().as_list()
HistH = math.ceil(H / N)
HistW = math.ceil(W / M)
inPut = tf.pad(inPut+1,[[0,0],[math.ceil((HistH*N-H)/2),math.floor((HistH*N-H)/2)],[math.ceil((HistW*M-W)/2),math.floor((HistW*M-W)/2)]])
# extract inPut to (-1,N,M,HistH*HistW)
# the shape of inPut_new is (10,5,5,400) if inPut is (10,100,100) and M=N=5
# the shape of inPut_new is (10,6,6,289) if inPut is (10,100,100) and M=N=6
# ...
inPut_new = tf.extract_image_patches(images=tf.expand_dims(inPut, axis=-1),
ksizes=[1, HistH, HistW, 1],
strides=[1, HistH, HistW, 1],
rates=[1, 1, 1, 1],
padding='VALID')
# following codes are referenced from https://stackoverflow.com/a/50883668
max_arr_plus = tf.reduce_max(inPut_new) + 1
new_shape = tf.shape(inPut_new)
group = max_arr_plus * tf.reshape(tf.range(new_shape[0] * new_shape[1]*new_shape[2]),
[new_shape[0], new_shape[1],new_shape[2]])
ids = inPut_new + group[:, :, :, None]
out = tf.bincount(ids, minlength=max_arr_plus*new_shape[0]*new_shape[1]*new_shape[2])
out = tf.reshape(out,[new_shape[0], new_shape[1],new_shape[2],max_arr_plus])
return out[:,:,:,1:]
Experimental comparison
You can compare the time consumption of three methods at the same time.
import tensorflow as tf
import math
import time
import numpy as np
def makeHistBins(inPut, N, M=None):
if M is None:
M = N
LEN = int(inPut.shape[0])
ImgH = int(inPut.shape[1])
ImgW = int(inPut.shape[2])
HistH = math.ceil(ImgH/N)
HistW = math.ceil(ImgW/M)
for k in range(LEN):
for i in range(N):
for j in range(M):
hist = tf.reshape(tf.math.bincount(inPut[k, math.floor(ImgH*i/N):math.ceil(ImgH*(i+1)/N), math.floor(ImgW*j/M):math.ceil(ImgW*(j+1)/M)], minlength=256), [256])
if j == 0:
row = tf.expand_dims(hist, 0)
else:
row = tf.concat([row, tf.expand_dims(hist, 0)], 0)
if i == 0:
col = tf.expand_dims(row, 0)
else:
col = tf.concat([col, tf.expand_dims(row, 0)], 0)
if k == 0:
out = tf.expand_dims(col, 0)
else:
out = tf.concat([out, tf.expand_dims(col, 0)], 0)
return out
def makeHistBins2(inPut, N, M=None):
if M is None:
M = N
LEN = int(inPut.shape[0])
ImgH = int(inPut.shape[1])
ImgW = int(inPut.shape[2])
HistH = math.ceil(ImgH/N)
HistW = math.ceil(ImgW/M)
splits_h = [int(np.floor((ImgH*(i+1))/N)- np.floor((ImgH*i)/N)) for i in range(N)]
splits_w = [int(np.floor((ImgW*(i+1))/M)- np.floor((ImgW*i)/M)) for i in range(M)]
# splits_h = np.array([ImgH // N for _ in range(N)])
# if ImgH - sum(splits_h) > 0:
# splits_h[:(ImgH - sum(splits_h))] += 1
#
# splits_w = np.array([ImgW // M for _ in range(M)])
# if ImgW - sum(splits_w) > 0:
# splits_w[:ImgW - sum(splits_w)] += 1
# print(splits_w)
out = tf.map_fn(
lambda x: tf.stack(
[tf.math.bincount(tf.reshape(z, [-1]), minlength=256, maxlength=256) for y in tf.split(x, splits_h, axis=0) for z in tf.split(y, splits_w, axis=1) ]
), inPut)
return tf.reshape(out, [-1, len(splits_h), len(splits_w), 256])
def imagemakeHistBins(inPut, N, M=None):
if M is None:
M = N
B, H, W = inPut.get_shape().as_list()
HistH = math.ceil(H / N)
HistW = math.ceil(W / M)
inPut = tf.pad(inPut+1,[[0,0],[0,HistH*N-H],[0,HistW*M-H]])
# extract inPut to (-1,N,M,HistH*HistW)
# the shape of inPut_new is (10,5,5,400) if inPut is (10,100,100) and M=N=5
# the shape of inPut_new is (10,6,6,289) if inPut is (10,100,100) and M=N=6
# ...
inPut_new = tf.extract_image_patches(images=tf.expand_dims(inPut, axis=-1),
ksizes=[1, HistH, HistW, 1],
strides=[1, HistH, HistW, 1],
rates=[1, 1, 1, 1],
padding='VALID')
# following codes are referenced from https://stackoverflow.com/a/50883668
max_arr_plus = tf.reduce_max(inPut_new) + 1
new_shape = tf.shape(inPut_new)
group = max_arr_plus * tf.reshape(tf.range(new_shape[0] * new_shape[1]*new_shape[2]),
[new_shape[0], new_shape[1],new_shape[2]])
ids = inPut_new + group[:, :, :, None]
out = tf.bincount(ids, minlength=max_arr_plus*new_shape[0]*new_shape[1]*new_shape[2])
out = tf.reshape(out,[new_shape[0], new_shape[1],new_shape[2],max_arr_plus])
return out[:,:,:,1:]
Images =tf.placeholder(shape=(10,100,100),dtype=tf.int32)
out1 = makeHistBins(Images, 5)
out2 = makeHistBins2(Images, 5)
out3 = imagemakeHistBins(Images, 5)
with tf.Session() as sess:
example = np.random.randint(0, 256, size=(10, 100, 100))
value1,value2,value3 = sess.run([out1,out2,out3], feed_dict={Images: example})
print(np.all(value1 == value2))
print(np.all(value1 == value3))
time1 = time.clock()
for _ in range(10):
sess.run(out1,feed_dict={Images:example})
time2 = time.clock()
print('makeHistBins cost time(ms) : %.2f' % ((time2 - time1)*1000))
time1 = time.clock()
for _ in range(10):
sess.run(out2, feed_dict={Images: example})
time2 = time.clock()
print('makeHistBins2 cost time(ms) : %.2f' % ((time2 - time1) * 1000))
time1 = time.clock()
for _ in range(10):
sess.run(out3, feed_dict={Images: example})
time2 = time.clock()
print('imagemakeHistBins cost time(ms) : %.2f' % ((time2 - time1) * 1000))
The result:
# True
# True
# makeHistBins cost time(ms) : 1187.79
# makeHistBins2 cost time(ms) : 568.32
# imagemakeHistBins cost time(ms) : 74.57
The operation time of imagemakeHistBins is reduced by an order of magnitude compared with that of imagemakeHistBins and makeHistBins2.
Notes
Please also pay attention to your extraction strategy.
According to your logic, the blocking strategy is as follows:
ImgH = 100
N=6
start_h = [math.floor(ImgH*i/N) for i in range(N)] # [0, 16, 33, 50, 66, 83]
end_h = [math.ceil(ImgH*(i+1)/N) for i in range(N)] # [17, 34, 50, 67, 84, 100]
# [0:17, 16:34, 33:50, 50:67, 66:84, 83:100]
Such a block cannot be supported by both tf.split and tf.extract_image_patches.
splits_h = np.array([ImgH // N for _ in range(N)])
if ImgH - sum(splits_h) > 0:
splits_h[:(ImgH - sum(splits_h))] += 1
# [17 17 17 17 16 16]
So my strategy is to fill the last block with 0. Because I take the result from 1 to MAX(256) + 1 by out[:,:,:,1:], it will not affect the correctness of the final result.
# tf.extract_image_patches
# [17 17 17 17 17 15]
Even solution
def imagemakeHistBins(inPut, N, M=None):
if M is None:
M = N
B, H, W = inPut.get_shape().as_list()
splits_h = np.array([H // N for _ in range(N)])
if H - sum(splits_h) > 0:
splits_h[:(H - sum(splits_h))] += 1
splits_h = [0] + splits_h.tolist()
splits_h = np.cumsum(splits_h)
splits_w = np.array([W // M for _ in range(M)])
if W - sum(splits_w) > 0:
splits_w[:(W - sum(splits_w))] += 1
splits_w = [0] + splits_w.tolist()
splits_w = np.cumsum(splits_w)
# splits_w=[ 0 17 34 51 68 84 100] if inPut is (10,100,100) and M=N=6
print(splits_w)
inPut_new = []
for i in range(N):
for j in range(M):
hist = inPut[:, splits_h[i]:splits_h[i + 1], splits_w[j]:splits_w[j + 1]]
hist = tf.pad(hist+1, [[0, 0],
[0, splits_h[1] - splits_h[i + 1] + splits_h[i]],
[0, splits_w[1] - splits_w[j + 1] + splits_w[j]]])
inPut_new.append(hist)
inPut_new = tf.reshape(tf.transpose(tf.stack(inPut_new), [1, 0, 2, 3]), [-1, N, M, splits_h[1] * splits_w[1]])
# following codes are referenced from https://stackoverflow.com/a/50883668
max_arr_plus = tf.reduce_max(inPut_new) + 1
new_shape = tf.shape(inPut_new)
group = max_arr_plus * tf.reshape(tf.range(new_shape[0] * new_shape[1]*new_shape[2]),
[new_shape[0], new_shape[1],new_shape[2]])
ids = inPut_new + group[:, :, :, None]
out = tf.bincount(ids, minlength=max_arr_plus*new_shape[0]*new_shape[1]*new_shape[2])
out = tf.reshape(out,[new_shape[0], new_shape[1],new_shape[2],max_arr_plus])
return out[:,:,:,1:]