import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
_, filenames, captions = coco.load_records(train=True)
output:- Data loaded from cache-file: data/coco/records_train.pkl
num_images = len(filenames)
num_images
output:118287
def load_image(path, size=None):
img = Image.open(path)
if not size is None:
img = img.resize(size=size, resample=Image.LANCZOS)
return img
def show_image(idx):
dir = coco.train_dir
filename = filenames[idx]
caption = captions[idx]
path = os.path.join(dir, filename)
for cap in caption:
print(cap)
img = load_image(path)
plt.imshow(img)
plt.show()
image_model = vgg16.VGG16()
image_model.summary()
output: Model: "vgg16"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_3 (InputLayer) [(None, 224, 224, 3)] 0
_________________________________________________________________
block1_conv1 (Conv2D) (None, 224, 224, 64) 1792
_________________________________________________________________
block1_conv2 (Conv2D) (None, 224, 224, 64) 36928
_________________________________________________________________
block1_pool (MaxPooling2D) (None, 112, 112, 64) 0
_________________________________________________________________
block2_conv1 (Conv2D) (None, 112, 112, 128) 73856
_________________________________________________________________
block2_conv2 (Conv2D) (None, 112, 112, 128) 147584
_________________________________________________________________
block2_pool (MaxPooling2D) (None, 56, 56, 128) 0
_________________________________________________________________
block3_conv1 (Conv2D) (None, 56, 56, 256) 295168
_________________________________________________________________
block3_conv2 (Conv2D) (None, 56, 56, 256) 590080
_________________________________________________________________
block3_conv3 (Conv2D) (None, 56, 56, 256) 590080
_________________________________________________________________
block3_pool (MaxPooling2D) (None, 28, 28, 256) 0
_________________________________________________________________
block4_conv1 (Conv2D) (None, 28, 28, 512) 1180160
_________________________________________________________________
block4_conv2 (Conv2D) (None, 28, 28, 512) 2359808
_________________________________________________________________
block4_conv3 (Conv2D) (None, 28, 28, 512) 2359808
_________________________________________________________________
block4_pool (MaxPooling2D) (None, 14, 14, 512) 0
_________________________________________________________________
block5_conv1 (Conv2D) (None, 14, 14, 512) 2359808
_________________________________________________________________
block5_conv2 (Conv2D) (None, 14, 14, 512) 2359808
_________________________________________________________________
block5_conv3 (Conv2D) (None, 14, 14, 512) 2359808
_________________________________________________________________
block5_pool (MaxPooling2D) (None, 7, 7, 512) 0
_________________________________________________________________
flatten (Flatten) (None, 25088) 0
_________________________________________________________________
fc1 (Dense) (None, 4096) 102764544
_________________________________________________________________
fc2 (Dense) (None, 4096) 16781312
_________________________________________________________________
predictions (Dense) (None, 1000) 4097000
=================================================================
Total params: 138,357,544
Trainable params: 138,357,544
Non-trainable params: 0
_________________________________________________________________
transfer_layer = image_model.get_layer('fc2')
image_model_transfer = Model(inputs=image_model.input,
outputs=transfer_layer.output)
img_size = K.int_shape(image_model.input)[1:3]
img_size
output: (224, 224)
transfer_values_size = K.int_shape(transfer_layer.output)[1]
transfer_values_size
output: 4096
def print_progress(count, max_count):
pct_complete = count / max_count
msg = '\r- Progress: {0:.1%}'.format(pct_complete)
sys.stdout.write(msg)
sys.stdout.flush()
def process_images(data_dir, filenames, batch_size=32):
num_images = len(filenames)
shape = (batch_size,) + img_size + (3,)
image_batch = np.zeros(shape=shape, dtype=np.float16)
shape = (num_images, transfer_values_size)
transfer_values = np.zeros(shape=shape, dtype=np.float16)
start_index = 0
while start_index < num_images:
print_progress(count=start_index, max_count=num_images)
end_index = start_index + batch_size
if end_index > num_images:
end_index = num_images
current_batch_size = end_index - start_index
for i, filename in enumerate(filenames[start_index:end_index]):
path = os.path.join(data_dir, filename)
img = load_image(path, size=img_size)
image_batch[i] = img
transfer_values_batch = image_model_transfer.predict(image_batch[0:current_batch_size])
transfer_values[start_index:end_index] = transfer_values_batch[0:current_batch_size]
start_index = end_index
print()
return transfer_values
def process_train_images():
print('Eğitim setindeki {0} resim işleniyor...'.format(len(filenames)))
cache_path = os.path.join(coco.data_dir, 'transfer_values_train.pkl')
transfer_values = cache(cache_path=cache_path,
fn=process_images,
data_dir=coco.train_dir,
filenames=filenames)
return transfer_values
%%time
transfer_values = process_train_images()
print('Shape:', transfer_values.shape)
output:
Eğitim setindeki 118287 resim işleniyor...
- Progress: 0.0%
ValueError Traceback (most recent call last)
<timed exec> in <module>
<ipython-input-122-03d9be1e09b0> in process_train_images()
4 cache_path = os.path.join(coco.data_dir, 'transfer_values_train.pkl')
5
----> 6 transfer_values = cache(cache_path=cache_path,
7 fn=process_images,
8 data_dir=coco.train_dir,
~\Desktop\yazılım\doğal dil işleme\6-image captioning\coco.py in cache(cache_path, fn, *args, **kwargs)
239
240 # Call the function / class-init with the supplied arguments.
--> 241 obj = fn(*args, **kwargs)
242
243 # Save the data to a cache-file.
<ipython-input-121-ee644d93b5de> in process_images(data_dir, filenames, batch_size)
22 path = os.path.join(data_dir, filename)
23 img = load_image(path, size=img_size)
---> 24 image_batch[i] = img
25 enter code here`
26 transfer_values_batch = image_model_transfer.predict(image_batch[0:current_batch_size])
ValueError: could not broadcast input array from shape (224,224) into shape (224,224,3)