implement similarity algorithm for questtype ?Submit?
This commit is contained in:
parent
67f92d6174
commit
53fda5caaf
6 changed files with 533 additions and 5 deletions
|
|
@ -24,7 +24,7 @@
|
|||
*
|
||||
* @var array
|
||||
*/
|
||||
public $models = array('quests', 'uploads', 'users');
|
||||
public $models = array('quests', 'uploads', 'users', 'characters', 'questgroups');
|
||||
|
||||
|
||||
|
||||
|
|
@ -210,6 +210,27 @@
|
|||
catch(\nre\exceptions\IdNotFoundException $e) {
|
||||
}
|
||||
}
|
||||
$submission['similar'] = $this->Submit->getSimilarSubmissions(
|
||||
$seminary['id'],
|
||||
$quest['id'],
|
||||
$character['id'],
|
||||
$submission['id']
|
||||
);
|
||||
foreach($submission['similar'] as &$similarSubmission)
|
||||
{
|
||||
$similarSubmission['quest'] = $this->Quests->getQuestById(
|
||||
$similarSubmission['quest_id']
|
||||
);
|
||||
$similarSubmission['questgroup'] = $this->Questgroups->getQuestgroupById(
|
||||
$similarSubmission['quest']['questgroup_id']
|
||||
);
|
||||
$similarSubmission['character'] = $this->Characters->getCharacterById(
|
||||
$similarSubmission['character_id']
|
||||
);
|
||||
$similarSubmission['upload'] = $this->Uploads->getSeminaryuploadById(
|
||||
$similarSubmission['upload_id']
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Status
|
||||
|
|
|
|||
|
|
@ -81,6 +81,12 @@
|
|||
$questId, $characterId, $uploadId
|
||||
);
|
||||
|
||||
// Index submission for similarity calculation
|
||||
$this->addDocument(
|
||||
$this->db->getInsertId(),
|
||||
ROOT.DS.\nre\configs\AppConfig::$dirs['seminaryuploads'].DS.$filename
|
||||
);
|
||||
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
@ -163,6 +169,245 @@
|
|||
);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* TODO getSimilarSubmissions()
|
||||
*/
|
||||
public function getSimilarSubmissions($seminaryId, $questId, $characterId, $submissionId)
|
||||
{
|
||||
// List of submissions with high similarity
|
||||
$similarSubmissions = array();
|
||||
|
||||
// Get IDFs
|
||||
$idf_N = $this->getIDF_total($seminaryId);
|
||||
$idf_n = $this->getIDF_docs($seminaryId);
|
||||
|
||||
// Get stored TFs of submission
|
||||
$tfsA = $this->getTFs($submissionId);
|
||||
|
||||
// Iterate through submissions of same task
|
||||
$submissions = $this->getSubmissionsForQuest(
|
||||
$questId,
|
||||
$characterId,
|
||||
$submissionId
|
||||
);
|
||||
foreach($submissions as &$submission)
|
||||
{
|
||||
if(is_null($submission['similarity']))
|
||||
{
|
||||
// Get stored TFs of submissions to compare to
|
||||
$tfsB = $this->getTFs($submission['id']);
|
||||
|
||||
// Calculate similarity
|
||||
$submission['similarity'] = \hhu\z\lib\Similarity::compare(
|
||||
$tfsA,
|
||||
$tfsB,
|
||||
$idf_N,
|
||||
$idf_n
|
||||
);
|
||||
|
||||
// Save similarity
|
||||
$this->setSimilarity(
|
||||
$submissionId,
|
||||
$submission['id'],
|
||||
$submission['similarity']
|
||||
);
|
||||
}
|
||||
|
||||
// Add high simnilarities to list
|
||||
if($submission['similarity'] >= 0.7) {
|
||||
$similarSubmissions[] = $submission;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return $similarSubmissions;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* TODO addDocument()
|
||||
*/
|
||||
private function addDocument($submissionId, $filename)
|
||||
{
|
||||
// Read document
|
||||
$document = \hhu\z\lib\Similarity::readDocument($filename);
|
||||
if($document === false) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Split document into terms
|
||||
$terms = \hhu\z\lib\Similarity::splitNgrams($document);
|
||||
|
||||
// Update global values
|
||||
$this->addTerms($submissionId, $terms);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* TODO addTerms()
|
||||
*/
|
||||
private function addTerms($submissionId, $terms)
|
||||
{
|
||||
// Calculate IDF: n (n_term)
|
||||
$uniqueTerms = array();
|
||||
foreach($terms as &$term)
|
||||
{
|
||||
if(!in_array($term, $uniqueTerms))
|
||||
{
|
||||
// Add term to database
|
||||
$this->db->query(
|
||||
'INSERT IGNORE INTO questtypes_submit_terms '.
|
||||
'(term) '.
|
||||
'VALUES '.
|
||||
'(?)',
|
||||
's',
|
||||
$term
|
||||
);
|
||||
$uniqueTerms[] = $term;
|
||||
}
|
||||
|
||||
// Link term to submission
|
||||
$this->db->query(
|
||||
'INSERT INTO questtypes_submit_submissions_terms '.
|
||||
'(submission_id, term_id, tf) '.
|
||||
'SELECT ?, questtypes_submit_terms.id, 1 '.
|
||||
'FROM questtypes_submit_terms '.
|
||||
'WHERE term = ? '.
|
||||
'ON DUPLICATE KEY UPDATE '.
|
||||
'tf = tf + 1',
|
||||
'is',
|
||||
$submissionId,
|
||||
$term
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private function getSubmissionsForQuest($questId, $characterId, $submissionId)
|
||||
{
|
||||
return $this->db->query(
|
||||
'SELECT questtypes_submit_characters.id, questtypes_submit_characters.created, questtypes_submit_characters.quest_id, character_id, upload_id, questtypes_submit_similarities.similarity '.
|
||||
'FROM questtypes_submit_characters '.
|
||||
'LEFT JOIN questtypes_submit_similarities ON questtypes_submit_similarities.submission_id1 = ? AND questtypes_submit_similarities.submission_id2 = questtypes_submit_characters.id '.
|
||||
'WHERE quest_id = ? AND character_id != ?',
|
||||
'iii',
|
||||
$submissionId,
|
||||
$questId, $characterId
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* TODO getTFs()
|
||||
*/
|
||||
private function getTFs($submissionId)
|
||||
{
|
||||
// Read terms
|
||||
$terms = $this->db->query(
|
||||
'SELECT term, tf '.
|
||||
'FROM questtypes_submit_submissions_terms '.
|
||||
'INNER JOIN questtypes_submit_terms ON questtypes_submit_terms.id = questtypes_submit_submissions_terms.term_id '.
|
||||
'WHERE submission_id = ?',
|
||||
'i',
|
||||
$submissionId
|
||||
);
|
||||
|
||||
// Convert to TFs
|
||||
$tfs = array();
|
||||
foreach($terms as &$term) {
|
||||
$tfs[$term['term']] = $term['tf'];
|
||||
}
|
||||
|
||||
|
||||
// Return TFs
|
||||
return $tfs;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* TODO getIDF_N()
|
||||
* Total count of submissions (per Seminary)
|
||||
*/
|
||||
private function getIDF_total($seminaryId)
|
||||
{
|
||||
$data = $this->db->query(
|
||||
'SELECT count(questtypes_submit_characters.id) as c '.
|
||||
'FROM charactertypes '.
|
||||
'INNER JOIN characters ON characters.charactertype_id = charactertypes.id '.
|
||||
'INNER JOIN questtypes_submit_characters ON questtypes_submit_characters.character_id = characters.id '.
|
||||
'WHERE charactertypes.seminary_id = ?',
|
||||
'i',
|
||||
$seminaryId
|
||||
);
|
||||
if(!empty($data)) {
|
||||
return $data[0]['c'];
|
||||
}
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* TODO getIDF_n()
|
||||
* Count of submissions each term is in (per Seminary)
|
||||
*/
|
||||
private function getIDF_docs($seminaryId)
|
||||
{
|
||||
$terms = $this->db->query(
|
||||
'SELECT questtypes_submit_terms.term, count(*) AS c '.
|
||||
'FROM charactertypes '.
|
||||
'INNER JOIN characters ON characters.charactertype_id = charactertypes.id '.
|
||||
'INNER JOIN questtypes_submit_characters ON questtypes_submit_characters.character_id = characters.id '.
|
||||
'INNER JOIN questtypes_submit_submissions_terms ON questtypes_submit_submissions_terms.submission_id = questtypes_submit_characters.id '.
|
||||
'INNER JOIN questtypes_submit_terms ON questtypes_submit_terms.id = questtypes_submit_submissions_terms.term_id '.
|
||||
'WHERE charactertypes.seminary_id = ? '.
|
||||
'GROUP BY questtypes_submit_terms.term',
|
||||
'i',
|
||||
$seminaryId
|
||||
);
|
||||
|
||||
$idfs = array();
|
||||
foreach($terms as &$term) {
|
||||
$idfs[$term['term']] = $term['c'];
|
||||
}
|
||||
|
||||
|
||||
return $idfs;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* TODO setSimilarity()
|
||||
*/
|
||||
private function setSimilarity($submissionId1, $submissionId2, $similarity)
|
||||
{
|
||||
$this->db->query(
|
||||
'INSERT INTO questtypes_submit_similarities '.
|
||||
'(submission_id1, submission_id2, similarity) '.
|
||||
'VALUES '.
|
||||
'(?, ?, ?) '.
|
||||
'ON DUPLICATE KEY UPDATE '.
|
||||
'similarity = ?',
|
||||
'iidd',
|
||||
$submissionId1, $submissionId2, $similarity,
|
||||
$similarity
|
||||
);
|
||||
$this->db->query(
|
||||
'INSERT INTO questtypes_submit_similarities '.
|
||||
'(submission_id1, submission_id2, similarity) '.
|
||||
'VALUES '.
|
||||
'(?, ?, ?) '.
|
||||
'ON DUPLICATE KEY UPDATE '.
|
||||
'similarity = ?',
|
||||
'iidd',
|
||||
$submissionId2, $submissionId1, $similarity,
|
||||
$similarity
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
||||
|
|
|
|||
|
|
@ -16,6 +16,24 @@
|
|||
<?php endforeach ?>
|
||||
</ol>
|
||||
<?php endif ?>
|
||||
<?php if(!empty($submission['similar'])) : ?>
|
||||
<h4><?=_('Similar submissions')?></h4>
|
||||
<?php foreach($submission['similar'] as &$similar) : ?>
|
||||
<ul>
|
||||
<li>
|
||||
<p><small><?=('Similarity')?>: <?=$numberFormatter->format($similar['similarity'])?></small></p>
|
||||
<p><a href="<?=$linker->link(array('uploads','seminary',$seminary['url'], $similar['upload']['url']))?>"><?=$similar['upload']['name']?></a></p>
|
||||
<p><small>
|
||||
<a href="<?=$linker->link(array('quests','submission',$seminary['url'],$similar['questgroup']['url'],$similar['quest']['url'],$similar['character']['url']))?>">
|
||||
<?=$similar['character']['name']?>,
|
||||
<?=$similar['quest']['title']?>
|
||||
</a>,
|
||||
<?=$dateFormatter->format(new \DateTime($similar['created']))?> <?=$timeFormatter->format(new \DateTime($similar['created']))?>
|
||||
</small></p>
|
||||
</li>
|
||||
</ul>
|
||||
<?php endforeach ?>
|
||||
<?php endif ?>
|
||||
</li>
|
||||
<?php endforeach ?>
|
||||
</ol>
|
||||
|
|
@ -24,10 +42,17 @@
|
|||
<form method="post" class="logreg">
|
||||
<?php $submission = array_pop($submissions); ?>
|
||||
<?php if(!$solved) : ?>
|
||||
<?=_('Comment')?><br />
|
||||
<textarea name="characterdata[comment]"></textarea><br />
|
||||
<input type="hidden" name="characterdata[submission_id]" value="<?=$submission['id']?>" />
|
||||
<fieldset>
|
||||
<legend><?=_('Comment')?></legend>
|
||||
<textarea id="characterdata-comment" name="characterdata[comment]"></textarea><br />
|
||||
<input type="hidden" name="characterdata[submission_id]" value="<?=$submission['id']?>" />
|
||||
</fieldset>
|
||||
<input type="submit" name="submit" value="<?=_('solved')?>" />
|
||||
<input type="submit" name="submit" value="<?=_('unsolved')?>" />
|
||||
<?php endif ?>
|
||||
</form>
|
||||
<script>
|
||||
$(function() {
|
||||
$("#characterdata-comment").markItUp(mySettings);
|
||||
});
|
||||
</script>
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue