Etymology reader

Translates etymology shorthand abbreviations (see example).

Source code (hide)

Global files are not shown below.

Table of contents
  1. index.php

index.php

back to toc
<?php
/* globals, templates */
$locals = array(
    
'title'       => 'Etymology reader',
    
'description' => 'Translates etymology shorthand abbreviations (<a href="index.php?example=1" title="prefill with example">see example</a>).',
    
'files'       => array('index.php')
);
include(
'../globals/globals.php');

/* variables */
// input
$input = array(
    
'text'     => stripslashes($_POST['text']),
    
'example'  => $_GET['example']
);

if(
$input['example']) {
    
$input['text'] = '< G Angst fear, anxiety, OHG angust (c. MLG angest, MD anxt), equiv. to ang- (akin to eng narrow, constricted) + -st abstract nominal suffix, perh. a conglomerate of a suffix *-os- + *-ti- suffix forming abstracts';
}

if(
$input['text']) {
    
// conversion regex
    // mostly based on http://www.etymonline.com/abbr.php and shorthand definitions at http://dictionary.reference.com
    
    /* associative array of definitions */
    
$regex['associative'] = Array(
        
"#(?:^| )< #"  => "From ",
        
"([0-9]+)c\.?" => "$1th century",
        
"abl\."        => "ablative",
        
"acc\."        => "accusative",
        
"adj\."        => "adjective",
        
"adv\."        => "adverb",
        
"alter\."      => "alteration",
        
"Amer.Eng\."   => "American English",
        
"Anglo-Fr\.?"  => "Anglo-French",
        
"Anglo-L\.?"   => "Anglo-Latin",
        
"Anglo-Norm\.?"=> "Anglo-Norman",
        
"Ar\.?"        => "Arabic",
        
"Assyr\.?"     => "Assyrian",
        
"#\bc\.? ?([0-9]+)#"  => "circa $1",
        
"c\.?"         => "compare",
        
"caus\.?"      => "causative",
        
"Celt\.?"      => "Celtic",
        
"cf\.?"        => "compare",
        
"chem\.?"      => "chemical",
        
"comb\.?"      => "combining",
        
"comp\.?"      => "comparative",
        
"Dan\.?"       => "Danish",
        
"dat\.?"       => "dative",
        
"deriv\.?"     => "derived",
        
"dial\.?"      => "dialectal",
        
"dim\.?"       => "diminutive",
        
"Du\.?"        => "Dutch",
        
"E\.Fris\."    => "East Frisian",
        
"e\.g\."       => "for example",
        
"Egypt\."      => "Egyptian",
        
"eng\.?"       => "English",
        
"esp\."        => "especially",
        
"equiv\."      => "equivalent",
        
"fem\.?"       => "feminine",
        
"Fl\.?"        => "Flemish",
        
"Fr?\.?"       => "French",
        
"Frank\.?"     => "Frankish",
        
"freq\."       => "frequentative",
        
"Fris\.?"      => "Frisian",
        
"fut\.?"       => "future tense",
        
"Gael\.?"      => "Gaelic",
        
"Gaul\.?"      => "Gaulish",
        
"gen\.?"       => "genitive",
        
"G(?:er)?\.?"  => "German",
        
"ger\.?"       => "gerund",
        
"Goth\."       => "Gothic",
        
"Gk\.?"        => "Greek",
        
"Gmc\.?"       => "Germanic",
        
"Heb\.?"       => "Hebrew",
        
"I\.?E\.?"     => "Indo-European",
        
"imper\.?"     => "imperative",
        
"indic\.?"     => "indicative",
        
"inf\.?"       => "infinitive",
        
"infl\.?"      => "influenced",
        
"intens\.?"    => "intensive",
        
"Ir\.?"        => "Irish",
        
"Iran\."       => "Iranian",
        
"irreg\.?"     => "irregular",
        
"It\."         => "Italien",
        
"L\.?"         => "Latin",
        
"lit\.?"       => "literally",
        
"Lith\.?"      => "Lithuanian",
        
"L\.?L\.?"     => "Late Latin",
        
"Loan-transl\.?" => "loan-translation",
        
"loc\.?"       => "locative",
        
"Low Ger\.?"   => "Low German",
        
"masc\.?"      => "masculine",
        
"M\.?Du?\.?"   => "Middle Dutch",
        
"M\.?E\.?"     => "Middle English",
        
"M\.?Fr?\.?"   => "Middle French",
        
"M\.?H\.?G"    => "Middle High German",
        
"M\.?L"        => "Medieval Latin",
        
"M\.?L\.?G\.?" => "Middle Low German",
        
"Mod\.?Eng\.?" => "Modern English",
        
"Mod\.?Gk\.?"  => "Modern Greek",
        
"Mod\.?L\.?"   => "Modern Latin",
        
"n\."          => "noun",
        
"neut\.?"      => "neuter",
        
"N\.?Gmc\.?"   => "North Germanic",
        
"nom\.?"       => "nominative",
        
"Norm\.?"      => "Norman",
        
"North Sea Gmc\.?" => "North Sea Germanic",
        
"N\.?T\.?"     => "New Testament",
        
"obj\.?"       => "objective",
        
"obs\.?"       => "obsolete",
        
"O\.?Celt\.?"  => "Old Celtic",
        
"O\.?C\.?S\.?" => "Old Church Slavonic",
        
"O\.?Dan\.?"   => "Old Danish",
        
"O\.?Du\.?"    => "Old Dutch",
        
"O\.?E\.?"     => "Old English",
        
"O\.?E\.?D\.?" => "Oxford English Dictionary",
        
"O\.?Fr?\.?"   => "Old French",
        
"O\.?Fris\.?"  => "Old Frisian",
        
"O\.?H\.?G\.?" => "Old High German",
        
"O\.?Ir\.?"    => "Old Irish",
        
"O\.?It\.?"    => "Old Italian",
        
"O\.?LowG\.?"  => "Old Low German",
        
"O\.?N\.?"     => "Old Norse",
        
"O\.?N\.?Fr\.?"=> "Old North French",
        
"O\.?Pers\.?"  => "Old Persian",
        
"O\.?Prov\.?"  => "Old Provençal",
        
"O\.?Prus\.?"  => "Old Prussian",
        
"orig\.?"      => "originally",
        
"O\.?S\.?"     => "Old Saxon",
        
"Osc\.?"       => "Oscan",
        
"O\.?Slav\.?"  => "Old Slavic",
        
"O\.?Sp\.?"    => "Old Spanish",
        
"O\.?Sw\.?"    => "Old Swedish",
        
"O\.?T\.?"     => "Old Testament",
        
"part\."       => "participle",
        
"pass\."       => "passive",
        
"perf\.?"      => "perfective",
        
"perh\.?"      => "perhaps",
        
"Pers\.?"      => "Persian",
        
"pers\.?"      => "person",
        
"P\.?Gmc\.?"   => "Proto-Germanic",
        
"Pg\.?"        => "Portuguese",
        
"Phoen\.?"     => "Phoenician",
        
"P\.?I\.?E\.?" => "Proto-Indo-European",
        
"pl\.?"        => "plural",
        
"Pol\.?"       => "Polish",
        
"Port\.?"      => "Portuguese",
        
"poss\.?"      => "possessive",
        
"pp\.?"        => "past participle",
        
"prep\.?"      => "preposition",
        
"pres\.?"      => "present tense",
        
"pres\.?-pret\.?" => "present-preterite",
        
"pret\.?"      => "preterite",
        
"priv\.?"      => "privative",
        
"prob\.?"      => "probably",
        
"pron\.?"      => "pronoun",
        
"prop\.?"      => "properly",
        
"Prov\.?"      => "Provençal",
        
"prp\.?"       => "present participle",
        
"ptp\.?"       => "past participle",
        
"pt\.?"        => "past tense",
        
"q\.?v\.?"     => "quo vide",
        
"redupl\.?"    => "reduplicated",
        
"refl\.?"      => "reflexive",
        
"Russ\.?"      => "Russian",
        
"s\.?"         => "singular",
        
"Scand\.?"     => "Scandinavian",
        
"Scot\.?"      => "Scottish",
        
"Sem\.?"       => "Semitic",
        
"Serb\.?"      => "Serbian",
        
"sing\.?"      => "singular",
        
"Skt\.?"       => "Sanskrit",
        
"Slav\.?"      => "Slavic",
        
"Sp\.?"        => "Spanish",
        
"subj\.?"      => "subject",
        
"superl\.?"    => "superlative",
        
"Swed\.?"      => "Swedish",
        
"transl\.?"    => "translation",
        
"Turk\.?"      => "Turkish",
        
"ult\.?"       => "ultimately",
        
"uncert\.?"    => "uncertain",
        
"U\.?S\.?"     => "United States",
        
"v\.?"         => "verb",
        
"var\.?"       => "variant",
        
"V\.?L\.?"     => "Vulgar Latin",
        
"voc\.?"       => "vocative",
        
"W\.?Afr\.?"   => "West African",
        
"W\.?Fris\.?"  => "West Frisian",
        
"W\.?Gmc\.?"   => "West Germanic",
        
"W\.?Saxon"    => "West Saxon"
    
);

    
// build search array
    
$regex['search'] = array();
    
$i 0;
    foreach(
array_keys($regex['associative']) as $pattern) {
        
$regex['search'][$i] = '#\b' $pattern '\b#';
        
$i++;
    }
    
$regex['search'] = str_replace(array('\.\b#''\.?\b#'), array('\.#''(?:\.|\b)#'), $regex['search']);
    
$regex['search'] = str_replace(array('#\b#''#\b#'), array('#''#'), $regex['search']);
    
    
// build replace array
    
$regex['replace'] = array_values($regex['associative']);

    
// perform regex
    
$output['text'] = preg_replace($regex['search'],$regex['replace'],$input['text']);
}

/* page content */
// debug
gDebug(get_defined_vars());

// output form
if($output['text']) {
    echo 
'<h2>Generated text</h2>';
    echo 
'<textarea readonly="readonly">' $output['text'] . '</textarea>';
}

// input form
?>
    <h2>Input form</h2>
    <form action="index.php" method="post">
        <label for="text">Etymology text</label><br />
        <textarea
            id="text"
            name="text"
            rows="12"
            cols="100"><?php echo htmlspecialchars($input['text']); ?></textarea>
        <?php gDebugOption(); ?>
        <input type="submit" value="Convert" /><br />
        <input type="reset" value="Reset form" />
    </form>

<?php
/* globals, templates */
makeFooter();
?>

Input form




This tool is written and copyright by Jesse Plamondon-Willard (Pathoschild). You may freely use, distribute, and modify this script in any way and for any purpose, so long as you cite the above name as original author.