php实现相似图片
相似图片检测方法
使用感知哈希(pHash)
感知哈希通过计算图片的哈希值来比较相似度。常用方法包括平均哈希(aHash)、差异哈希(dHash)和感知哈希(pHash)。以下是使用PHP-GD库实现pHash的示例:

function calculatePHash($imagePath) {
$size = 32;
$smallerSize = 8;
$image = imagecreatefromstring(file_get_contents($imagePath));
list($width, $height) = getimagesize($imagePath);
$resized = imagecreatetruecolor($size, $size);
imagecopyresampled($resized, $image, 0, 0, 0, 0, $size, $size, $width, $height);
imagedestroy($image);
$grayscale = array();
for ($y = 0; $y < $size; $y++) {
for ($x = 0; $x < $size; $x++) {
$rgb = imagecolorat($resized, $x, $y);
$r = ($rgb >> 16) & 0xFF;
$g = ($rgb >> 8) & 0xFF;
$b = $rgb & 0xFF;
$grayscale[$y][$x] = 0.299 * $r + 0.587 * $g + 0.114 * $b;
}
}
imagedestroy($resized);
$dct = array();
for ($u = 0; $u < $smallerSize; $u++) {
for ($v = 0; $v < $smallerSize; $v++) {
$sum = 0;
for ($x = 0; $x < $size; $x++) {
for ($y = 0; $y < $size; $y++) {
$sum += $grayscale[$y][$x] *
cos((2 * $x + 1) * $u * M_PI / (2 * $size)) *
cos((2 * $y + 1) * $v * M_PI / (2 * $size));
}
}
$dct[$u][$v] = $sum;
}
}
$avg = array_sum(array_map('array_sum', $dct)) / ($smallerSize * $smallerSize);
$hash = '';
for ($u = 0; $u < $smallerSize; $u++) {
for ($v = 0; $v < $smallerSize; $v++) {
$hash .= ($dct[$u][$v] > $avg) ? '1' : '0';
}
}
return $hash;
}
function hammingDistance($hash1, $hash2) {
$distance = 0;
$len = min(strlen($hash1), strlen($hash2));
for ($i = 0; $i < $len; $i++) {
if ($hash1[$i] != $hash2[$i]) {
$distance++;
}
}
return $distance;
}
使用颜色直方图比较
颜色直方图通过统计图片中颜色分布来比较相似度:

function compareHistograms($imagePath1, $imagePath2) {
$bins = 8;
$hist1 = calculateHistogram($imagePath1, $bins);
$hist2 = calculateHistogram($imagePath2, $bins);
$sum = 0;
for ($i = 0; $i < $bins * $bins * $bins; $i++) {
$sum += min($hist1[$i], $hist2[$i]);
}
return $sum;
}
function calculateHistogram($imagePath, $bins) {
$image = imagecreatefromstring(file_get_contents($imagePath));
$width = imagesx($image);
$height = imagesy($image);
$histogram = array_fill(0, $bins * $bins * $bins, 0);
for ($x = 0; $x < $width; $x++) {
for ($y = 0; $y < $height; $y++) {
$rgb = imagecolorat($image, $x, $y);
$r = ($rgb >> 16) & 0xFF;
$g = ($rgb >> 8) & 0xFF;
$b = $rgb & 0xFF;
$rBin = floor($r / (256 / $bins));
$gBin = floor($g / (256 / $bins));
$bBin = floor($b / (256 / $bins));
$index = $rBin * $bins * $bins + $gBin * $bins + $bBin;
$histogram[$index]++;
}
}
imagedestroy($image);
$total = $width * $height;
return array_map(function($val) use ($total) {
return $val / $total;
}, $histogram);
}
使用OpenCV扩展
PHP可通过OpenCV扩展实现更高级的相似度检测:
$image1 = cv\imread('image1.jpg', cv\IMREAD_COLOR);
$image2 = cv\imread('image2.jpg', cv\IMREAD_COLOR);
$orb = cv\ORB::create();
$keypoints1 = new cv\KeyPoint();
$keypoints2 = new cv\KeyPoint();
$descriptors1 = new cv\Mat();
$descriptors2 = new cv\Mat();
$orb->detectAndCompute($image1, null, $keypoints1, $descriptors1);
$orb->detectAndCompute($image2, null, $keypoints2, $descriptors2);
$matcher = cv\BFMatcher::create(cv\NORM_HAMMING, true);
$matches = new cv\DMatch();
$matcher->match($descriptors1, $descriptors2, $matches);
$similarity = 0;
foreach ($matches as $match) {
$similarity += $match->distance;
}
$similarity = 1 - ($similarity / count($matches) / 256);
使用机器学习服务
对于大规模应用,可考虑集成云端API:
function compareWithCloudAPI($image1, $image2) {
$client = new \GuzzleHttp\Client();
$response = $client->post('https://api.cloudvision.com/v1/similarity', [
'json' => [
'image1' => base64_encode(file_get_contents($image1)),
'image2' => base64_encode(file_get_contents($image2))
]
]);
return json_decode($response->getBody(), true)['similarity'];
}
性能优化建议
- 预处理哈希值并存入数据库
- 对大型图集使用KD树或LSH索引
- 考虑使用Redis缓存比较结果
- 对批量处理使用队列系统
注意事项
- 光照变化会影响颜色直方图方法
- 几何变换会影响特征点匹配
- pHash对内容变化敏感但对格式变化鲁棒
- 商业应用需考虑版权问题






