Etymology reader
Translates etymology shorthand abbreviations (see example).
Source code (hide)
Global files are not shown below.
Table of contents
index.php
back to toc
<?php
/* globals, templates */
$locals = array(
'title' => 'Etymology reader',
'description' => 'Translates etymology shorthand abbreviations (<a href="index.php?example=1" title="prefill with example">see example</a>).',
'files' => array('index.php')
);
include('../globals/globals.php');
/* variables */
// input
$input = array(
'text' => stripslashes($_POST['text']),
'example' => $_GET['example']
);
if($input['example']) {
$input['text'] = '< G Angst fear, anxiety, OHG angust (c. MLG angest, MD anxt), equiv. to ang- (akin to eng narrow, constricted) + -st abstract nominal suffix, perh. a conglomerate of a suffix *-os- + *-ti- suffix forming abstracts';
}
if($input['text']) {
// conversion regex
// mostly based on http://www.etymonline.com/abbr.php and shorthand definitions at http://dictionary.reference.com
/* associative array of definitions */
$regex['associative'] = Array(
"#(?:^| )< #" => "From ",
"([0-9]+)c\.?" => "$1th century",
"abl\." => "ablative",
"acc\." => "accusative",
"adj\." => "adjective",
"adv\." => "adverb",
"alter\." => "alteration",
"Amer.Eng\." => "American English",
"Anglo-Fr\.?" => "Anglo-French",
"Anglo-L\.?" => "Anglo-Latin",
"Anglo-Norm\.?"=> "Anglo-Norman",
"Ar\.?" => "Arabic",
"Assyr\.?" => "Assyrian",
"#\bc\.? ?([0-9]+)#" => "circa $1",
"c\.?" => "compare",
"caus\.?" => "causative",
"Celt\.?" => "Celtic",
"cf\.?" => "compare",
"chem\.?" => "chemical",
"comb\.?" => "combining",
"comp\.?" => "comparative",
"Dan\.?" => "Danish",
"dat\.?" => "dative",
"deriv\.?" => "derived",
"dial\.?" => "dialectal",
"dim\.?" => "diminutive",
"Du\.?" => "Dutch",
"E\.Fris\." => "East Frisian",
"e\.g\." => "for example",
"Egypt\." => "Egyptian",
"eng\.?" => "English",
"esp\." => "especially",
"equiv\." => "equivalent",
"fem\.?" => "feminine",
"Fl\.?" => "Flemish",
"Fr?\.?" => "French",
"Frank\.?" => "Frankish",
"freq\." => "frequentative",
"Fris\.?" => "Frisian",
"fut\.?" => "future tense",
"Gael\.?" => "Gaelic",
"Gaul\.?" => "Gaulish",
"gen\.?" => "genitive",
"G(?:er)?\.?" => "German",
"ger\.?" => "gerund",
"Goth\." => "Gothic",
"Gk\.?" => "Greek",
"Gmc\.?" => "Germanic",
"Heb\.?" => "Hebrew",
"I\.?E\.?" => "Indo-European",
"imper\.?" => "imperative",
"indic\.?" => "indicative",
"inf\.?" => "infinitive",
"infl\.?" => "influenced",
"intens\.?" => "intensive",
"Ir\.?" => "Irish",
"Iran\." => "Iranian",
"irreg\.?" => "irregular",
"It\." => "Italien",
"L\.?" => "Latin",
"lit\.?" => "literally",
"Lith\.?" => "Lithuanian",
"L\.?L\.?" => "Late Latin",
"Loan-transl\.?" => "loan-translation",
"loc\.?" => "locative",
"Low Ger\.?" => "Low German",
"masc\.?" => "masculine",
"M\.?Du?\.?" => "Middle Dutch",
"M\.?E\.?" => "Middle English",
"M\.?Fr?\.?" => "Middle French",
"M\.?H\.?G" => "Middle High German",
"M\.?L" => "Medieval Latin",
"M\.?L\.?G\.?" => "Middle Low German",
"Mod\.?Eng\.?" => "Modern English",
"Mod\.?Gk\.?" => "Modern Greek",
"Mod\.?L\.?" => "Modern Latin",
"n\." => "noun",
"neut\.?" => "neuter",
"N\.?Gmc\.?" => "North Germanic",
"nom\.?" => "nominative",
"Norm\.?" => "Norman",
"North Sea Gmc\.?" => "North Sea Germanic",
"N\.?T\.?" => "New Testament",
"obj\.?" => "objective",
"obs\.?" => "obsolete",
"O\.?Celt\.?" => "Old Celtic",
"O\.?C\.?S\.?" => "Old Church Slavonic",
"O\.?Dan\.?" => "Old Danish",
"O\.?Du\.?" => "Old Dutch",
"O\.?E\.?" => "Old English",
"O\.?E\.?D\.?" => "Oxford English Dictionary",
"O\.?Fr?\.?" => "Old French",
"O\.?Fris\.?" => "Old Frisian",
"O\.?H\.?G\.?" => "Old High German",
"O\.?Ir\.?" => "Old Irish",
"O\.?It\.?" => "Old Italian",
"O\.?LowG\.?" => "Old Low German",
"O\.?N\.?" => "Old Norse",
"O\.?N\.?Fr\.?"=> "Old North French",
"O\.?Pers\.?" => "Old Persian",
"O\.?Prov\.?" => "Old Provençal",
"O\.?Prus\.?" => "Old Prussian",
"orig\.?" => "originally",
"O\.?S\.?" => "Old Saxon",
"Osc\.?" => "Oscan",
"O\.?Slav\.?" => "Old Slavic",
"O\.?Sp\.?" => "Old Spanish",
"O\.?Sw\.?" => "Old Swedish",
"O\.?T\.?" => "Old Testament",
"part\." => "participle",
"pass\." => "passive",
"perf\.?" => "perfective",
"perh\.?" => "perhaps",
"Pers\.?" => "Persian",
"pers\.?" => "person",
"P\.?Gmc\.?" => "Proto-Germanic",
"Pg\.?" => "Portuguese",
"Phoen\.?" => "Phoenician",
"P\.?I\.?E\.?" => "Proto-Indo-European",
"pl\.?" => "plural",
"Pol\.?" => "Polish",
"Port\.?" => "Portuguese",
"poss\.?" => "possessive",
"pp\.?" => "past participle",
"prep\.?" => "preposition",
"pres\.?" => "present tense",
"pres\.?-pret\.?" => "present-preterite",
"pret\.?" => "preterite",
"priv\.?" => "privative",
"prob\.?" => "probably",
"pron\.?" => "pronoun",
"prop\.?" => "properly",
"Prov\.?" => "Provençal",
"prp\.?" => "present participle",
"ptp\.?" => "past participle",
"pt\.?" => "past tense",
"q\.?v\.?" => "quo vide",
"redupl\.?" => "reduplicated",
"refl\.?" => "reflexive",
"Russ\.?" => "Russian",
"s\.?" => "singular",
"Scand\.?" => "Scandinavian",
"Scot\.?" => "Scottish",
"Sem\.?" => "Semitic",
"Serb\.?" => "Serbian",
"sing\.?" => "singular",
"Skt\.?" => "Sanskrit",
"Slav\.?" => "Slavic",
"Sp\.?" => "Spanish",
"subj\.?" => "subject",
"superl\.?" => "superlative",
"Swed\.?" => "Swedish",
"transl\.?" => "translation",
"Turk\.?" => "Turkish",
"ult\.?" => "ultimately",
"uncert\.?" => "uncertain",
"U\.?S\.?" => "United States",
"v\.?" => "verb",
"var\.?" => "variant",
"V\.?L\.?" => "Vulgar Latin",
"voc\.?" => "vocative",
"W\.?Afr\.?" => "West African",
"W\.?Fris\.?" => "West Frisian",
"W\.?Gmc\.?" => "West Germanic",
"W\.?Saxon" => "West Saxon"
);
// build search array
$regex['search'] = array();
$i = 0;
foreach(array_keys($regex['associative']) as $pattern) {
$regex['search'][$i] = '#\b' . $pattern . '\b#';
$i++;
}
$regex['search'] = str_replace(array('\.\b#', '\.?\b#'), array('\.#', '(?:\.|\b)#'), $regex['search']);
$regex['search'] = str_replace(array('#\b#', '#\b#'), array('#', '#'), $regex['search']);
// build replace array
$regex['replace'] = array_values($regex['associative']);
// perform regex
$output['text'] = preg_replace($regex['search'],$regex['replace'],$input['text']);
}
/* page content */
// debug
gDebug(get_defined_vars());
// output form
if($output['text']) {
echo '<h2>Generated text</h2>';
echo '<textarea readonly="readonly">' . $output['text'] . '</textarea>';
}
// input form
?>
<h2>Input form</h2>
<form action="index.php" method="post">
<label for="text">Etymology text</label><br />
<textarea
id="text"
name="text"
rows="12"
cols="100"><?php echo htmlspecialchars($input['text']); ?></textarea>
<?php gDebugOption(); ?>
<input type="submit" value="Convert" /><br />
<input type="reset" value="Reset form" />
</form>
<?php
/* globals, templates */
makeFooter();
?>
Input form
This tool is written and copyright by Jesse Plamondon-Willard (Pathoschild). You may freely use, distribute, and modify this script in any way and for any purpose, so long as you cite the above name as original author.
