<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE root>
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" article-type="research-article" dtd-version="1.2" xml:lang="en"><front><journal-meta><journal-id journal-id-type="publisher-id">Russian Journal of Linguistics</journal-id><journal-title-group><journal-title xml:lang="en">Russian Journal of Linguistics</journal-title><trans-title-group xml:lang="ru"><trans-title>Russian Journal of Linguistics</trans-title></trans-title-group></journal-title-group><issn publication-format="print">2687-0088</issn><issn publication-format="electronic">2686-8024</issn><publisher><publisher-name xml:lang="en">Peoples’ Friendship University of Russia named after Patrice Lumumba (RUDN University)</publisher-name></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">31330</article-id><article-id pub-id-type="doi">10.22363/2687-0088-30122</article-id><article-categories><subj-group subj-group-type="toc-heading" xml:lang="en"><subject>Articles</subject></subj-group><subj-group subj-group-type="toc-heading" xml:lang="ru"><subject>Статьи</subject></subj-group><subj-group subj-group-type="toc-heading" xml:lang="zh"><subject>Articles</subject></subj-group><subj-group subj-group-type="article-type"><subject>Research Article</subject></subj-group></article-categories><title-group><article-title xml:lang="en">A cognitive linguistic approach to analysis and correction of orthographic errors</article-title><trans-title-group xml:lang="ru"><trans-title>Лингвокогнитивный подход к классификации и исправлению орфографических ошибок</trans-title></trans-title-group></title-group><contrib-group><contrib contrib-type="author"><contrib-id contrib-id-type="orcid">https://orcid.org/0000-0003-0306-087X</contrib-id><name-alternatives><name xml:lang="en"><surname>Reynolds</surname><given-names>Robert</given-names></name><name xml:lang="ru"><surname>Рейнольдс</surname><given-names>Роберт</given-names></name></name-alternatives><bio xml:lang="en"><p>employed as Assistant Research Professor in the Office of Digital Humanities</p></bio><bio xml:lang="ru"><p>доцент-исследователь в Отделе цифровых гуманитарных наук</p></bio><email>robert_reynolds@byu.edu</email><xref ref-type="aff" rid="aff1"/><xref ref-type="aff" rid="aff2"/></contrib><contrib contrib-type="author"><contrib-id contrib-id-type="orcid">https://orcid.org/0000-0001-5047-1909</contrib-id><name-alternatives><name xml:lang="en"><surname>Janda</surname><given-names>Laura</given-names></name><name xml:lang="ru"><surname>Янда</surname><given-names>Лора</given-names></name></name-alternatives><bio xml:lang="en"><p>Professor of Russian in the Department of Language and Culture</p></bio><bio xml:lang="ru"><p>профессор кафедры языка и культуры</p></bio><email>laura.janda@uit.no</email><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author"><contrib-id contrib-id-type="orcid">https://orcid.org/0000-0003-1308-3506</contrib-id><name-alternatives><name xml:lang="en"><surname>Nesset</surname><given-names>Tore</given-names></name><name xml:lang="ru"><surname>Нессет</surname><given-names>Торе</given-names></name></name-alternatives><bio xml:lang="en"><p>Professor of Russian linguistics in the Department of Language and Culture</p></bio><bio xml:lang="ru"><p>профессор кафедры языка и культуры</p></bio><email>tore.nesset@uit.no</email><xref ref-type="aff" rid="aff1"/></contrib></contrib-group><aff-alternatives id="aff1"><aff><institution xml:lang="en">UiT The Arctic University of Norway</institution></aff><aff><institution xml:lang="ru">Университет Тромсё - Арктический университет Норвегии</institution></aff></aff-alternatives><aff-alternatives id="aff2"><aff><institution xml:lang="en">Brigham Young University</institution></aff><aff><institution xml:lang="ru">Университет Бригама Янга</institution></aff></aff-alternatives><pub-date date-type="pub" iso-8601-date="2022-06-29" publication-format="electronic"><day>29</day><month>06</month><year>2022</year></pub-date><volume>26</volume><issue>2</issue><issue-title xml:lang="en">Computational Linguistics and Discourse Complexology</issue-title><issue-title xml:lang="ru">Компьютерная лингвистика и дискурсивная комплексология</issue-title><fpage>391</fpage><lpage>408</lpage><history><date date-type="received" iso-8601-date="2022-06-29"><day>29</day><month>06</month><year>2022</year></date></history><permissions><copyright-statement xml:lang="en">Copyright ©; 2022, Reynolds R., Janda L., Nesset T.</copyright-statement><copyright-statement xml:lang="ru">Copyright ©; 2022, Рейнольдс Р., Янда Л., Нессет Т.</copyright-statement><copyright-statement xml:lang="zh">Copyright ©; 2022, Reynolds R., Janda L., Nesset T.</copyright-statement><copyright-year>2022</copyright-year><copyright-holder xml:lang="en">Reynolds R., Janda L., Nesset T.</copyright-holder><copyright-holder xml:lang="ru">Рейнольдс Р., Янда Л., Нессет Т.</copyright-holder><copyright-holder xml:lang="zh">Reynolds R., Janda L., Nesset T.</copyright-holder><ali:free_to_read xmlns:ali="http://www.niso.org/schemas/ali/1.0/"/><license><ali:license_ref xmlns:ali="http://www.niso.org/schemas/ali/1.0/">https://creativecommons.org/licenses/by-nc/4.0</ali:license_ref></license></permissions><self-uri xlink:href="https://journals.rudn.ru/linguistics/article/view/31330">https://journals.rudn.ru/linguistics/article/view/31330</self-uri><abstract xml:lang="en"><p style="text-align: justify;">In this paper, we apply usage-based linguistic analysis to systematize the inventory of orthographic errors observed in the writing of non-native users of Russian. The data comes from a longitudinal corpus (560K tokens) of non-native academic writing. Traditional spellcheckers mark errors and suggest corrections, but do not attempt to model why errors are made. Our approach makes it possible to recognize not only the errors themselves, but also the conceptual causes of these errors, which lie in misunderstandings of Russian phonotactics and morphophonology and the way they are represented by orthographic conventions. With this linguistically-based system in place, we can propose targeted grammar explanations that improve users’ command of Russian morphophonology rather than merely correcting errors. Based on errors attested in the non-native academic writing corpus, we introduce a taxonomy of errors, organized by pedagogical domains. Then, on the basis of this taxonomy, we create a set of mal-rules to expand an existing finite-state analyzer of Russian. The resulting morphological analyzer tags wordforms that fit our taxonomy with specific error tags. For each error tag, we also develop an accompanying grammar explanation to help users understand why and how to correct the diagnosed errors. Using our augmented analyzer, we build a webapp to allow users to type or paste a text and receive detailed feedback and correction on common Russian morphophonological and orthographic errors.</p></abstract><trans-abstract xml:lang="ru"><p style="text-align: justify;">В представленной статье мы предлагаем систематизацию орфографических ошибок неносителей русского языка на основе лингвистических и когнитивных критериев. Материалом исследования послужили данные лонгитюдного корпуса (560000 слов) работ на русском языке, написанных студентами-иностранцами. Традиционные автоматические средства проверки орфографии (spell checkers) выявляют ошибки и предлагают исправления, но не могут построить объяснительные когнитивные модели. Предлагаемый подход позволяет распознать не только сами ошибки, но и концептуальные причины этих ошибок, заключающиеся в непонимании фонотактики и морфофонологии русского языка, а также в способах их репрезентации орфографическими правилами. Этот способ позволяет обосновывать причины грамматических ошибок и рекомендовать правила, которые улучшают владение пользователями русской морфофонологией, а не просто исправляют ошибки. Принцип систематизации аннотированных ошибок в корпусе академического письма на неродном языке и таксономия ошибок ориентированы на преподавание. На основе представленной таксономии мы разработали набор правил (mal-rules), расширяющих функционал конечно-автоматного анализатора русского языка. Разработанный морфологический анализатор аннотирует словоформы специальными тегами ошибок. Для каждого тега ошибки мы предлагаем сопровождающее пояснение, чтобы помочь пользователям понять, почему и как исправить диагностированные ошибки. Используя наш расширенный анализатор, мы создаем веб-приложение, позволяющее пользователям набирать или вставлять текст, а также подробные комментарии и исправления распространенных морфофонологических и орфографических ошибок в русском языке.</p></trans-abstract><kwd-group xml:lang="en"><kwd>morphophonology</kwd><kwd>phonotactics</kwd><kwd>orthography</kwd><kwd>corpus</kwd><kwd>error taxonomy</kwd><kwd>webapp</kwd></kwd-group><kwd-group xml:lang="ru"><kwd>морфофонология</kwd><kwd>фонотактика</kwd><kwd>орфография</kwd><kwd>корпус</kwd><kwd>таксономия ошибок</kwd></kwd-group><funding-group/></article-meta></front><body></body><back><ref-list><ref id="B1"><label>1.</label><mixed-citation>Amaral, Luiz &amp; Detmar Meurers.2011. On using intelligent computer-assisted language learning in real-life foreign language teaching and learning. ReCALL 23(1). 4-24.</mixed-citation></ref><ref id="B2"><label>2.</label><mixed-citation>Beesley, Kenneth R. &amp; Lauri Karttunen. 2003. Finite State Morphology. Stanford, CA: CSLI Publications.</mixed-citation></ref><ref id="B3"><label>3.</label><mixed-citation>Biggs, John &amp; Catherine Tang. 2011. Teaching for Quality Learning at University. Maidenhead, UK: Open University Press.</mixed-citation></ref><ref id="B4"><label>4.</label><mixed-citation>Biggs, John. 1999. What the student does: Teaching for enhanced learning. Higher Education &amp; Development 18 (1). 57-75.</mixed-citation></ref><ref id="B5"><label>5.</label><mixed-citation>Bocharov, Victor, Svetlana Alexeeva, Dmitry Granovsky, E. Protopopova, Anastasia Bodrova, Svetlana Volskaya, I.V. Krylova &amp; A.S. Chuchunkov. 2013. Crowdsourcing morphological annotations. In Computational Linguistics and Intellectual Technologies: Papers from the Annual International Conference "Dialog" 1. http://opencorpora.org/doc/articles/2013_Dialog.pdf (accessed 20.04.2022).</mixed-citation></ref><ref id="B6"><label>6.</label><mixed-citation>Choi, Inn-Chull. 2016. Efficacy of an ICALL tutoring system and process-oriented corrective feedback. Computer Assisted Language Learning 29. 334-364.</mixed-citation></ref><ref id="B7"><label>7.</label><mixed-citation>Heift, Trude. 2010. Developing an Intelligent Language Tutor. CALICO Journal 27(3). 443-459.</mixed-citation></ref><ref id="B8"><label>8.</label><mixed-citation>Kopotev, Mixail, Sardana Ivanova, Anisia Katinskaia &amp; Roman Yangarber. 2019. Corpus-based language teaching tool. Trudy Meždunarodnii Konferencii «KORPUSNAYA LINGVISTIKA-2019». 30-39. (In Russ.)</mixed-citation></ref><ref id="B9"><label>9.</label><mixed-citation>Korobov, Mikhail. 2015. Morphological analyzer and generator for Russian and Ukrainian languages. In Proceedings of AIST’2015. 320-332. New York: Springer.</mixed-citation></ref><ref id="B10"><label>10.</label><mixed-citation>Krylov, Sergej &amp; Sergej Starostin. 2003. Upcoming tasks for morphological analysis and generation in the integrated information environment STARLING. In Proceedings of the International Conference “Dialog 2003”. https://www.dialog-21.ru/media/2655/krylov.pdf (In Russ.) (accessed 20.04.22).</mixed-citation></ref><ref id="B11"><label>11.</label><mixed-citation>Linden, Krister, Erik Axelson, Sam Hardwick &amp; Tommi A. Pirinen. 2011. HFST- framework for compiling and applying morphologies. In Cerstin Mahlow &amp; Michael Pietrowski (eds.), Systems and frameworks for computational morphology, 100 of Communications in Computer and Information Science, 67-85. New York: Springer.</mixed-citation></ref><ref id="B12"><label>12.</label><mixed-citation>Matthews, Clive. 1992. Going AI: Foundations of ICALL. Computer Assisted Language Learning 5(1). 13-31.</mixed-citation></ref><ref id="B13"><label>13.</label><mixed-citation>Matthews, Clive. 1992. Going AI: Foundations of ICALL. Computer Assisted Language Learning 5(1). 13-31.</mixed-citation></ref><ref id="B14"><label>14.</label><mixed-citation>Meurers, Detmar, Kordula De Kuthy, Florian Nuxoll, Björn Rudzewitz &amp;Ramon Ziai.2019. Scaling up intervention studies to investigate real-life foreign language learning in school. Annual Review of Applied Linguistics 39.</mixed-citation></ref><ref id="B15"><label>15.</label><mixed-citation>Nagata, Noriko. 2009. Robo-Sensei’s NLP-Based Error detection and feedback generation. CALICO Journal 26(3). 562-579.</mixed-citation></ref><ref id="B16"><label>16.</label><mixed-citation>Rozovskaya, Alla &amp; Dan Roth. 2019. Grammar Error Correction in Morphologically Rich Languages: The Case of Russian. Transactions of the Association for Computational Linguistics 7. 1-17. https://doi.org/10.1162/tacl_a_00251</mixed-citation></ref><ref id="B17"><label>17.</label><mixed-citation>Rozovskaya, Alla &amp; Dan Roth. 2021. How Good (really) are Grammatical Error Correction Systems? In Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume. 2686-2698.</mixed-citation></ref><ref id="B18"><label>18.</label><mixed-citation>Segalovich, Ilya. 2003. A fast morphological algorithm with unknown word guessing induced by a dictionary for a web search engine. In International Conference on Machine Learning; Models, Technologies and Applications. 273-280.</mixed-citation></ref><ref id="B19"><label>19.</label><mixed-citation>Sleeman, Derek. 1982. Inferring (mal) rules from pupil’s protocols. In Proceedings of the 5th European Conference on Artificial Intelligence (ECAI). 160-164. Orsay, France.</mixed-citation></ref><ref id="B20"><label>20.</label><mixed-citation>Vilkki, Liisa. 2005. RUSTWOL: A tool for automatic Russian word form recognition. In Antti Arppe, Lauri Carlson, Krister Lindén, Jussi Piitulainen, Mickael Suominen, Martti Vainio, Hanna Westerlund &amp; Anssi Yli-Jyrä (eds.), Inquiries into words, constraints and contexts: Festschrift for Kimmo Koskenniemi on his 60th Birthday, 151-162. Stanford, CA: CSLI Publications.</mixed-citation></ref><ref id="B21"><label>21.</label><mixed-citation>Vilkki, Liisa. 1997. RUSTWOL: A System for Automatic Recognition of Russian Words. Technical report, Lingsoft, Inc.</mixed-citation></ref><ref id="B22"><label>22.</label><mixed-citation>Vilkki, Liisa. 2005. RUSTWOL: A tool for automatic Russian word form recognition. In Arppe, A., Carlson, L., Lindén, K., Piitulainen, J., Suominen, M., Vainio, M., Westerlund, H., and Yli-Jyrä, A. (eds.), Inquiries into Words, Constraints and Contexts: Festschrift for Kimmo Koskenniemi on his 60th Birthday, 151-162. CSLI Publications.</mixed-citation></ref><ref id="B23"><label>23.</label><mixed-citation>Zaliznjak, Andrej A. 1977. Grammatical dictionary of the Russian language: In ection: Approx 100 000 words. Russkij Jazyk. (In Russ.)</mixed-citation></ref></ref-list></back></article>
