<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE root>
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" article-type="research-article" dtd-version="1.2" xml:lang="en"><front><journal-meta><journal-id journal-id-type="publisher-id">RUDN Journal of Language Studies, Semiotics and Semantics</journal-id><journal-title-group><journal-title xml:lang="en">RUDN Journal of Language Studies, Semiotics and Semantics</journal-title><trans-title-group xml:lang="ru"><trans-title>Вестник Российского университета дружбы народов. Серия: Теория языка. Семиотика. Семантика</trans-title></trans-title-group></journal-title-group><issn publication-format="print">2313-2299</issn><issn publication-format="electronic">2411-1236</issn><publisher><publisher-name xml:lang="en">Peoples’ Friendship University of Russia named after Patrice Lumumba (RUDN University)</publisher-name></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">38629</article-id><article-id pub-id-type="doi">10.22363/2313-2299-2024-15-1-195-210</article-id><article-id pub-id-type="edn">FKVAOI</article-id><article-categories><subj-group subj-group-type="toc-heading" xml:lang="en"><subject>DISCOURSE &amp; CORPUS STUDIES</subject></subj-group><subj-group subj-group-type="toc-heading" xml:lang="ru"><subject>ДИСКУРСИВНЫЕ И КОРПУСНЫЕ ИССЛЕДОВАНИЯ</subject></subj-group><subj-group subj-group-type="article-type"><subject>Research Article</subject></subj-group></article-categories><title-group><article-title xml:lang="en">Natural Language Processing and Fiction Text: Basis for Corpus Research</article-title><trans-title-group xml:lang="ru"><trans-title>Обработка естественного языка и художественный текст: база для корпусного исследования</trans-title></trans-title-group></title-group><contrib-group><contrib contrib-type="author"><contrib-id contrib-id-type="orcid">https://orcid.org/0000-0003-2280-1282</contrib-id><contrib-id contrib-id-type="spin">1753-4920</contrib-id><name-alternatives><name xml:lang="en"><surname>Gorozhanov</surname><given-names>Alexey I.</given-names></name><name xml:lang="ru"><surname>Горожанов</surname><given-names>Алексей Иванович</given-names></name></name-alternatives><bio xml:lang="en"><p>Dr.Sc. in Philology, Associate Professor, Professor of the Department of Grammar and History of the German Language, Faculty of German Language</p></bio><bio xml:lang="ru"><p>доктор филологических наук, доцент, профессор кафедры грамматики и истории немецкого языка, факультет немецкого языка</p></bio><email>a_gorozhanov@mail.ru</email><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author"><contrib-id contrib-id-type="orcid">https://orcid.org/0000-0002-6544-699X</contrib-id><contrib-id contrib-id-type="spin">1635-5260</contrib-id><name-alternatives><name xml:lang="en"><surname>Guseynova</surname><given-names>Innara A.</given-names></name><name xml:lang="ru"><surname>Гусейнова</surname><given-names>Иннара Алиевна</given-names></name></name-alternatives><bio xml:lang="en"><p>Dr.Sc. in Philology, Associate Professor, Vice-Rector</p></bio><bio xml:lang="ru"><p>доктор филологических наук, доцент, проректор</p></bio><email>guseynova@linguanet.ru</email><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author"><contrib-id contrib-id-type="orcid">https://orcid.org/0000-0002-2857-4386</contrib-id><contrib-id contrib-id-type="spin">5291-8660</contrib-id><name-alternatives><name xml:lang="en"><surname>Stepanova</surname><given-names>Darya V.</given-names></name><name xml:lang="ru"><surname>Степанова</surname><given-names>Дарья Валерьевна</given-names></name></name-alternatives><bio xml:lang="en"><p>PhD in Philology, Associate Professor</p></bio><bio xml:lang="ru"><p>кандидат филологических наук, доцент</p></bio><email>daryastepanova79@gmail.com</email><xref ref-type="aff" rid="aff2"/></contrib></contrib-group><aff-alternatives id="aff1"><aff><institution xml:lang="en">Moscow State Linguistic University</institution></aff><aff><institution xml:lang="ru">Московский государственный лингвистический университет</institution></aff></aff-alternatives><aff-alternatives id="aff2"><aff><institution xml:lang="en">Minsk State Linguistic University</institution></aff><aff><institution xml:lang="ru">Минский государственный лингвистический университет</institution></aff></aff-alternatives><pub-date date-type="pub" iso-8601-date="2024-03-15" publication-format="electronic"><day>15</day><month>03</month><year>2024</year></pub-date><volume>15</volume><issue>1</issue><issue-title xml:lang="en">VOL 15, NO1 (2024)</issue-title><issue-title xml:lang="ru">ТОМ 15, №1 (2024)</issue-title><fpage>195</fpage><lpage>210</lpage><history><date date-type="received" iso-8601-date="2024-04-05"><day>05</day><month>04</month><year>2024</year></date></history><permissions><copyright-statement xml:lang="en">Copyright ©; 2024, Gorozhanov A.I., Guseynova I.A., Stepanova D.V.</copyright-statement><copyright-statement xml:lang="ru">Copyright ©; 2024, Горожанов А.И., Гусейнова И.А., Степанова Д.В.</copyright-statement><copyright-year>2024</copyright-year><copyright-holder xml:lang="en">Gorozhanov A.I., Guseynova I.A., Stepanova D.V.</copyright-holder><copyright-holder xml:lang="ru">Горожанов А.И., Гусейнова И.А., Степанова Д.В.</copyright-holder><ali:free_to_read xmlns:ali="http://www.niso.org/schemas/ali/1.0/"/><license><ali:license_ref xmlns:ali="http://www.niso.org/schemas/ali/1.0/">https://creativecommons.org/licenses/by-nc/4.0</ali:license_ref></license></permissions><self-uri xlink:href="https://journals.rudn.ru/semiotics-semantics/article/view/38629">https://journals.rudn.ru/semiotics-semantics/article/view/38629</self-uri><abstract xml:lang="en"><p style="text-align: justify;">The study deals with NLP procedures on the material of the fiction texts in German and in English, which are considered as strong cultural texts. The aim of the study is to develop a model of such a technical device to process, analyze and interpret a fiction text, which would reveal the full potential of popular NLP tools within the corpus approach. The general methods used in the study are analysis and synthesis. Special methods are additionally used to solve certain specific issues: descriptive method, modelling and qualitative and quantitative analysis. The scientific novelty lies in the fact that the authors apply the crucial principles of the classical theories of text interpretation according to the latest methods and tools of the applied linguistics. As a practical result, special software has been developed, which is able to process SQL based linguistic corpora, automatically built with spaCy NLP library and Python programming language. This software can be used for a fiction text interpretation, as well as for compiling learning materials in Home Reading. It is assumed that the development of special software for strong cultural texts stimulates the search for scientific solutions and at the same time allows one to understand the essential differences that exist between natural and artificial intelligence.</p></abstract><trans-abstract xml:lang="ru"><p style="text-align: justify;">Рассматриваются процедуры обработки естественного языка (NLP) на материале художественных текстов на немецком и английском языках, которые рассматриваются как сильные культурные тексты. Целью исследования является разработка модели такого инструмента обработки, анализа и интерпретации художественного текста, который раскрывал бы весь потенциал популярных инструментов NLP в рамках корпусного подхода. Общими методами, используемыми в исследовании, являются анализ и синтез. Для решения отдельных задач дополнительно применяются специальные методы: описательный метод, моделирование и качественно-количественный анализ. Научная новизна заключается в том, что авторы совмещают основополагающие принципы «классической» теории интерпретации текста и новейшие методы и инструменты прикладной лингвистики. В результате было разработано специальное программное обеспечение, способное работать с лингвистическими корпусами на основе баз данных SQL, автоматически построенными с помощью библиотеки spaCy и языка программирования Python. Созданное приложение можно использовать для интерпретации художественного текста, а также для составления учебных материалов для дисциплины «Домашнее чтение». Предполагается, что разработка специального программного обеспечения для сильных культурных текстов стимулирует поиск научных решений и в то же время позволит понять существенные различия, существующие между естественным и искусственным интеллектом.</p></trans-abstract><kwd-group xml:lang="en"><kwd>natural language processing</kwd><kwd>fiction text</kwd><kwd>linguistic corpus</kwd><kwd>F. Kafka</kwd><kwd>J. London</kwd><kwd>applied linguistics</kwd><kwd>spaCy</kwd><kwd>spaCy</kwd></kwd-group><kwd-group xml:lang="ru"><kwd>обработка естественного языка</kwd><kwd>художественный текст</kwd><kwd>лингвистический корпус</kwd><kwd>Ф. Кафка</kwd><kwd>Дж. Лондон</kwd><kwd>прикладная лингвистика</kwd></kwd-group><funding-group/></article-meta></front><body></body><back><ref-list><ref id="B1"><label>1.</label><mixed-citation>Tsujii, J. (2021). Natural language processing and computational linguistics. Computational Linguistics, 47(4), 707-727. https://doi.org/10.1162/COLI_a_00420</mixed-citation></ref><ref id="B2"><label>2.</label><mixed-citation>O’Neill, H., Welsh, A., Smith, D.A., Roe, G. &amp; Terras, M. (2021). Text mining mill: Computationally detecting influence in the writings of John Stuart Mill from library records. Digital Scholarship in the Humanities, 36(4), 1013-1029. https://doi.org/10.1093/llc/fqab010</mixed-citation></ref><ref id="B3"><label>3.</label><mixed-citation>Fonseca, C.A., Guelpeli, M.V.C. &amp; De Souza Netto, R.S. (2021). Representation of structured data of the text genre as a technique for automatic text processing. Texto Livre, 15. https://doi.org/10.35699/1983-3652.2022.35445</mixed-citation></ref><ref id="B4"><label>4.</label><mixed-citation>Szabó, M.K., Ring, O., Nagy, B., Kiss, L., Koltai, J., Berend, G. &amp; Kmetty, Z. (2020). Exploring the dynamic changes of key concepts of the Hungarian socialist era with natural language processing methods. Historical Methods, 54(1), 1-13. https://doi.org/10.1080/0161 5440.2020.1823289</mixed-citation></ref><ref id="B5"><label>5.</label><mixed-citation>Malyuga, E.N. &amp; McCarthy, M. (2021). “No” and “net” as response tokens in English and Russian business discourse: In search of a functional equivalence. Russian Journal of Linguistics, 25(2), 391-416. https://doi.org/10.22363/2687-0088-2021-25-2-391-416</mixed-citation></ref><ref id="B6"><label>6.</label><mixed-citation>Gorozhanov, A.I. &amp; Guseynova, I.A. (2020). Corpus analysis of the grammatical categories’ constituents in fiction texts considering the linguo-regional component. Journal of Siberian Federal University. Humanities &amp; Social Sciences, 13(12), 2035-2048. https://doi.org/10.17516/1997-1370-0702. (In German).</mixed-citation></ref><ref id="B7"><label>7.</label><citation-alternatives><mixed-citation xml:lang="en">Denisova, G.V. (2020). Intertekst v sovremennoj sociokul’turnoj real’nosti Rossii i Italii. Moscow: Kanon+. P. 272. (In Russ.).</mixed-citation><mixed-citation xml:lang="ru">Денисова Г.В. Интертекст в современной социокультурной реальности России и Италии. М.: Kanon+, 2020. С. 272.</mixed-citation></citation-alternatives></ref><ref id="B8"><label>8.</label><mixed-citation>Milne, P.W. (2022). Praescriptum: Kafka’s two bodies. Philosophy Today, 66(3), 587-603. https://doi.org/10.5840/philtoday2022324451</mixed-citation></ref><ref id="B9"><label>9.</label><mixed-citation>Itkin, A. (2021). Kafka’s worlds. German Quarterly, 94(4), 493-508. https://doi.org/10.1111/gequ.12241</mixed-citation></ref><ref id="B10"><label>10.</label><mixed-citation>Roca, J.B. &amp; Rius, N.I. (2020). Kafka and disease. between reality and writing [Kafka y la enfermedad. Entre la realidad y la escritura] Revista Chilena De Literatura, 102, 233-247. https://doi.org/10.4067/S0718-22952020000200223</mixed-citation></ref><ref id="B11"><label>11.</label><mixed-citation>Logue, M. (2022). Patrick MacGill: A path to socialism shared with Jack London. [Patrick MacGill: el Camino hacia el Socialismo junto a Jack London]. Estudios Irlandeses, 17, 54-64. https://doi.org/10.24162/EI2022-10645</mixed-citation></ref><ref id="B12"><label>12.</label><mixed-citation>Hernandez, A. (2021). Jack London’s poetic animality and the problem of domestication. Journal of Modern Literature, 45(1), 40-55. https://doi.org/10.2979/jmodelite.45.1.03</mixed-citation></ref><ref id="B13"><label>13.</label><mixed-citation>López, J.I.G. (2020). Jack London, the socialist dream of a young poet. Revista De Estudios Norteamericanos, 24, 9-112. https://doi.org/10.12795/REN.2020.I24.05</mixed-citation></ref><ref id="B14"><label>14.</label><mixed-citation>Li, J., Lian, Z., Wu, Z., Zeng, L., Mu, L., yuan, y. &amp; ye, J. (2023). Artificial intelligence- based method for the rapid detection of fish parasites (ichthyophthirius multifiliis, gyrodactylus kobayashii, and argulus japonicus). Aquaculture, 563. https://doi.org/10.1016/j.aquaculture.2022.738790</mixed-citation></ref><ref id="B15"><label>15.</label><mixed-citation>Hachemi, A. &amp; Zeroual, A. (2022). Computer-assisted program for water calco-carbonic equilibrium computation. Earth Science Informatics, 15(1), 68-704. https://doi.org/10.1007/ s12145-021-00703-5</mixed-citation></ref><ref id="B16"><label>16.</label><mixed-citation>Li, W., Pu, H., &amp; Wang, R. (2021). Sign language recognition based on computer vision. In: Priceeding of 2021 IEEE International Conference on Artificial Intelligence and Computer Applications, ICAICA 2021. pp. 919-922. https://doi.org/10.1109/ICAICA52286.2021.9498024</mixed-citation></ref><ref id="B17"><label>17.</label><mixed-citation>Schmitt, X., Kubler, S., Robert, J., Papadakis, M. &amp; Letraon, y. (2019). A replicable comparison study of NER software: StanfordNLP, NLTK, OpenNLP, SpaCy, gate. In: Priceeding of 2019 6th International Conference on Social Networks Analysis, Management and Security, SNAMS 2019. pp. 338-343. https://doi.org/10.1109/SNAMS.2019.8931850</mixed-citation></ref><ref id="B18"><label>18.</label><mixed-citation>Ajani, D.T. (2019). Grammatico-Semantic Content of Primitives in the Major Themes of News Watch’s Reports on Nigerian Politics. The international journal of humanities &amp; social studies, 7(12), 327-337. https://doi.org/10.24940/theijhss/2019/v7/i12/HS1912-066</mixed-citation></ref><ref id="B19"><label>19.</label><citation-alternatives><mixed-citation xml:lang="en">Kraeva, I.A. et al. (2022). Germanistika i lingvodidaktika v Moskovskom i Minskom gosudarstvennykh lingvisticheskikh universitetakh: Istoki, razvitie, perspektivy. (In Russ.).</mixed-citation><mixed-citation xml:lang="ru">Краева И.А. Германистика и лингводидактика в Московском и Минском государственных лингвистических университетах: истоки, развитие, перспективы. Казань: Бук, 2022.</mixed-citation></citation-alternatives></ref><ref id="B20"><label>20.</label><citation-alternatives><mixed-citation xml:lang="en">Potapova, R.K. (2012). Diskursivnaya sostavlyayushchaya sovremennoi korpusnoi lingvistiki (primenitel’no k ustno-rechevym bazam dannykh). Bulletin of Moscow State Linguistic University, 639, 157-167. (In Russ.).</mixed-citation><mixed-citation xml:lang="ru">Потапова Р.К. Дискурсивная составляющая современной корпусной лингвистики (применительно к устно-речевым базам данных) // Вестник Московского государственного лингвистического университета. 2012. № 639. С. 157-167.</mixed-citation></citation-alternatives></ref><ref id="B21"><label>21.</label><citation-alternatives><mixed-citation xml:lang="en">Zubov, A.V. (2006). Korpusnaya lingvistika: vozmozhnosti i perspektivy. In: Proceedings of Conference “Russkii yazyk: Sistema i funktsionirovanie”, Minsk. pp. 22-27. (In Russ.).</mixed-citation><mixed-citation xml:lang="ru">Зубов А.В. Корпусная лингвистика: возможности и перспективы // Русский язык: система и функционирование. Минск: РИВШ, 2006. С. 22-27.</mixed-citation></citation-alternatives></ref><ref id="B22"><label>22.</label><mixed-citation>Kim, C., Choi, S., Jeong, J. &amp; Lee, E. (2022). Automatic risks detection and comparison techniques for general conditions of technical documents in purchasing order. In: Proceedings of ACM International Conference Proceeding Series. pp. 236-241. https://doi.org/10.1145/3543712.3543721</mixed-citation></ref><ref id="B23"><label>23.</label><mixed-citation>Fantechi, A., Gnesi, S., Livi, S. &amp; Semini, L. (2021). A spaCy-based tool for extracting variability from NL requirements. In: Priceeding of ACM International Conference Proceeding Series, Part F171625-B. pp. 32-35. https://doi.org/10.1145/3461002.3473074</mixed-citation></ref><ref id="B24"><label>24.</label><mixed-citation>Eyre, H., Chapman, A.B., Peterson, K.S., Shi, J., Alba, P.R., Jones, M.M. &amp; Patterson, O.V. (2021). Launching into clinical space with medspaCy: A new clinical text processing toolkit in Python. In: Proceedings AMIA … Annual Symposium Proceedings. AMIA Symposium, 2021. pp. 438-447.</mixed-citation></ref><ref id="B25"><label>25.</label><mixed-citation>Partalidou, E., Spyromitros-Xioufis, E., Doropoulos, S., Vologiannidis, S. &amp; Diamantaras, K.I. (2019). Design and implementation of an open source Greek POS tagger and entity recognizer using spaCy. In: Proceedings 2019 IEEE/WIC/ACM International Conference on Web Intelligence, WI 2019. pp. 337-341. https://doi.org/10.1145/3350546.3352543</mixed-citation></ref><ref id="B26"><label>26.</label><mixed-citation>Jugran, S., Kumar, A., Tyagi, B.S. &amp; Anand, V. (2021). Extractive automatic text summarization using SpaCy in Python NLP. In: Proceedings of 2021 International Conference on Advance Computing and Innovative Technologies in Engineering, ICACITE 2021. pp. 582-585. https://doi.org/10.1109/ICACITE51222.2021.9404712</mixed-citation></ref><ref id="B27"><label>27.</label><mixed-citation>Channabasamma, Suresh, y. &amp; Manusha Reddy, A. (2021). A contextual model for information extraction in resume analytics using NLP’s spaCy. Inventive computation and information technologies. Springer. pp. 395-404. https://doi. org/10.1007/978-981-33-4305-4_30</mixed-citation></ref><ref id="B28"><label>28.</label><mixed-citation>Harahus, M., Juhar, J. &amp; Hladek, D. (2022). Morphological annotation of the Slovak language in the spaCy library with the pretraining. In: Proceedings of 32nd International Conference Radioelektronika, Radioelektronika 2022. https://doi.org/10.1109/RADIOELEKTRONI KA54537.2022.9764935</mixed-citation></ref><ref id="B29"><label>29.</label><mixed-citation>Kumar, D., Choudhari, K., Patel, P., Pandey, S., Hajare, A. &amp; Jante, S. (2022). STAT simple text annotation tool (STAT): Web-based tool for creating training data for spaCy models. In: ICT Analysis and Applications. Singapore: Springer Nature. https://doi.org/10.1007/978-981-16-5655-2_29</mixed-citation></ref><ref id="B30"><label>30.</label><mixed-citation>Soni, P.K. &amp; Rambola, R. (2021). Deep learning, WordNet, and spaCy based hybrid method for detection of implicit aspects for sentiment analysis. In: Proceedings of 2021 International Conference on Intelligent Technologies, CONIT 2021. https://doi.org/10.1109/CONIT51480.2021.9498372</mixed-citation></ref><ref id="B31"><label>31.</label><mixed-citation>Chantrapornchai, C. &amp; Tunsakul, A. (2021). Information extraction on tourism domain using spaCy and BERT. ECTI Transactions on Computer and Information Technology, 15(1), 108- 122. https://doi.org/10.37936/ecti-cit.2021151.228621</mixed-citation></ref><ref id="B32"><label>32.</label><mixed-citation>Singh, N. &amp; Hussain, A. (2022). Rapid application development in cloud computing with IoT. In: IoT and AI technologies for sustainable living: A practical handbook. pp. 1-28. https://doi.org/10.1201/9781003051022-1</mixed-citation></ref><ref id="B33"><label>33.</label><citation-alternatives><mixed-citation xml:lang="en">Gorozhanov, A.I., Guseynova, I.A. &amp; Stepanova, D.V. (2022). Instrumentarii avtomatizirovannogo analiza perevoda khudozhestvennogo proizvedeniya. Issues of Applied Linguistics, 45, 62-89. https://doi.org/10.25076/vpl.45.03 (In Russ.).</mixed-citation><mixed-citation xml:lang="ru">Горожанов А.И., Гусейнова И.А., Степанова Д.В. Инструментарий автоматизированного анализа перевода художественного произведения // Вопросы прикладной лингвистики. М.: Национальное объединение преподавателей иностранных языков делового и профессионального общения в сфере бизнеса, 2022. № 45. С. 62-89. https://doi.org/10.25076/vpl.45.03</mixed-citation></citation-alternatives></ref><ref id="B34"><label>34.</label><citation-alternatives><mixed-citation xml:lang="en">Gorozhanov, A.I. (2021). Metod komparativnogo analiza gruppy tekstov (na materiale nemetskoyazychnykh nauchnykh statei. Bulletin of Moscow State Linguistic University, 5(847), 48-59. https://doi.org/10.52070/2542-2197_2021_5_847_48 (In Russ.).</mixed-citation><mixed-citation xml:lang="ru">Горожанов А.И. Метод компаративного анализа группы текстов (на материале немецкоязычных научных статей) // Вестник Московского государственного лингвистического университета. Гуманитарные науки. 2021. № 5(847). С. 48-59. https://doi.org/10.52070/2542-2197_2021_5_847_48</mixed-citation></citation-alternatives></ref><ref id="B35"><label>35.</label><mixed-citation>Singh, N., Kumar, M., Singh, B. &amp; Singh, J. (2022). DeepSpacy-NER: An efficient deep learning model for named entity recognition for Punjabi language. Evolving Systems, 14, 673-683. https://doi.org/10.1007/s12530-022-09453-1</mixed-citation></ref></ref-list></back></article>
