SOP.wtf/user/EmojiDetector.php

204 lines
4.6 KiB
PHP

<?php
namespace SteppingHat\EmojiDetector;
use Exception;
use SteppingHat\EmojiDetector\Model\EmojiInfo;
class EmojiDetector {
const LONGEST_EMOJI = 8;
const SKIN_TONES = [
'1F3FB' => 'skin-tone-2',
'1F3FC' => 'skin-tone-3',
'1F3FD' => 'skin-tone-4',
'1F3FE' => 'skin-tone-5',
'1F3FF' => 'skin-tone-6'
];
private $map;
private $regex;
private $dataDir;
/**
* EmojiDetector constructor.
* @throws Exception
*/
public function __construct() {
$this->dataDir = __DIR__ . '/../var';
$this->loadMap();
$this->loadRawEmojis();
}
/**
* @param $string
* @return EmojiInfo[]
*/
public function detect($string) {
$oldEncoding = mb_internal_encoding();
mb_internal_encoding('UTF-8');
/** @var EmojiInfo[] $emojiInfos */
$emojiInfos = [];
$matches = [];
foreach($this->regex as $icon) {
$strpos = mb_strpos($string, $icon);
if($strpos !== false) {
$matches[] = [$icon, $strpos];
}
}
$length = 0;
foreach($matches as $match) {
$emojiInfo = new EmojiInfo();
$emojiInfo->setEmoji($match[0]);
$emojiInfo->setOffset(strpos($string, $match[0]));
$emojiInfo->setMbOffset(mb_strpos($string, $match[0]));
// Break apart the hex characters and build the hex string
$hexCodes = [];
for($i = 0; $i < mb_strlen($emojiInfo->getEmoji()); $i++) {
$hexCodes[] = strtoupper(dechex($this->unicodeOrd(mb_substr($match[0], $i, 1))));
}
$emojiInfo->setHexCodes($hexCodes);
// Denote the emoji name
if(array_key_exists($emojiInfo->getHexString(), $this->map)) {
$emojiInfo->setName($this->map[$emojiInfo->getHexString()]['name']);
$emojiInfo->setShortName($this->map[$emojiInfo->getHexString()]['shortName']);
$emojiInfo->setCategory($this->map[$emojiInfo->getHexString()]['category']);
}
// Denote the skin tone
foreach($hexCodes as $hexCode) {
if(array_key_exists($hexCode, self::SKIN_TONES)) {
$emojiInfo->setSkinTone(self::SKIN_TONES[$hexCode]);
}
}
$length += (strlen($emojiInfo->getEmoji()) - 1);
$emojiInfos[] = $emojiInfo;
}
usort($emojiInfos, function(EmojiInfo $a, EmojiInfo $b) {
if($a->getOffset() == $b->getOffset()) {
return 0;
}
return $a->getOffset() < $b->getOffset() ? -1 : 1;
});
/** @var EmojiInfo[] $data */
$data = [];
foreach($emojiInfos as $emoji) {
if(count($data) == 0) {
$data[] = $emoji;
continue;
}
/** @var EmojiInfo $last */
$last = end($data);
$key = key($data);
if($last->getOffset() == $emoji->getOffset()) {
if($last->getMbLength() < $emoji->getMbLength()) {
$data[$key] = $emoji;
}
} else if($emoji->getOffset() >= strlen($last->getEmoji()) + $last->getOffset()) {
$data[] = $emoji;
}
reset($data);
}
mb_internal_encoding($oldEncoding);
return $data;
}
/**
* @param $string
* @return bool
*/
public function isSingleEmoji($string) {
if(mb_strlen($string) > self::LONGEST_EMOJI) return false;
$emojis = $this->detect($string);
if(count($emojis) !== 1) return false;
$emoji = array_pop($emojis);
$string = str_replace($emoji->getEmoji(), '', $string);
$split = $this->str_split_unicode($string);
if(count($split) > 1) return false;
else if(count($split) === 1 && $split[0] === '') return false;
return true;
}
private function loadMap() {
$mapFile = $this->dataDir . '/map.json';
if(!file_exists($mapFile)) {
throw new Exception("Could not load Emoji map file");
}
$this->map = json_decode(file_get_contents($mapFile), true);
}
private function loadRawEmojis() {
$mapFile = $this->dataDir . '/raw.json';
if(!file_exists($mapFile)) {
throw new Exception("Could not load Emoji raw file");
}
$this->regex = json_decode(file_get_contents($mapFile), true);
}
/**
* @param $hexChar
* @return bool|int
*/
private function unicodeOrd($hexChar) {
$ord0 = ord($hexChar[0]);
if($ord0 >= 0 && $ord0 <= 127) return $ord0;
$ord1 = ord($hexChar[1]);
if($ord0 >= 192 && $ord0 <= 223) return ($ord0 - 192) * 64 + ($ord1 - 128);
$ord2 = ord($hexChar[2]);
if($ord0 >= 224 && $ord0 <= 239) return ($ord0 - 224) * 4096 + ($ord1 - 128) * 64 + ($ord2 - 128);
$ord3 = ord($hexChar[3]);
if($ord0 >= 240 && $ord0 <= 247) return ($ord0 - 240) * 262144 + ($ord1 - 128) * 4096 + ($ord2 - 128) * 64 + ($ord3 - 128);
return false;
}
/**
* @param $str
* @param int $l
* @return false|string[]
*/
private function str_split_unicode($str, $l = 0) {
if ($l > 0) {
$ret = array();
$len = mb_strlen($str, "UTF-8");
for ($i = 0; $i < $len; $i += $l) {
$ret[] = mb_substr($str, $i, $l, "UTF-8");
}
return $ret;
}
return preg_split("//u", $str, -1, PREG_SPLIT_NO_EMPTY);
}
}