<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE root>
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" article-type="research-article" dtd-version="1.2" xml:lang="en"><front><journal-meta><journal-id journal-id-type="publisher-id">Discrete and Continuous Models and Applied Computational Science</journal-id><journal-title-group><journal-title xml:lang="en">Discrete and Continuous Models and Applied Computational Science</journal-title><trans-title-group xml:lang="ru"><trans-title>Discrete and Continuous Models and Applied Computational Science</trans-title></trans-title-group></journal-title-group><issn publication-format="print">2658-4670</issn><issn publication-format="electronic">2658-7149</issn><publisher><publisher-name xml:lang="en">Peoples' Friendship University of Russia named after Patrice Lumumba (RUDN University)</publisher-name></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">41391</article-id><article-id pub-id-type="doi">10.22363/2658-4670-2024-32-2-234-241</article-id><article-id pub-id-type="edn">CUCXTY</article-id><article-categories><subj-group subj-group-type="toc-heading" xml:lang="en"><subject>Articles</subject></subj-group><subj-group subj-group-type="toc-heading" xml:lang="ru"><subject>Статьи</subject></subj-group><subj-group subj-group-type="article-type"><subject>Research Article</subject></subj-group></article-categories><title-group><article-title xml:lang="en">Developing a computer system for student learning based on vision-language models</article-title><trans-title-group xml:lang="ru"><trans-title>Разработка компьютерной системы обучения студентов на основе визуально-лингвистических моделей</trans-title></trans-title-group></title-group><contrib-group><contrib contrib-type="author"><contrib-id contrib-id-type="orcid">https://orcid.org/0000-0003-3651-7629</contrib-id><contrib-id contrib-id-type="scopus">16408533100</contrib-id><contrib-id contrib-id-type="researcherid">O-8287-2017</contrib-id><name-alternatives><name xml:lang="en"><surname>Shchetinin</surname><given-names>Eugeny Yu.</given-names></name><name xml:lang="ru"><surname>Щетинин</surname><given-names>Е. Ю.</given-names></name></name-alternatives><bio xml:lang="en"><p>Doctor of Physical and Mathematical Sciences, lecturer of Department of Mathematics</p></bio><email>riviera-molto@mail.ru</email><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author"><contrib-id contrib-id-type="orcid">https://orcid.org/0000-0002-8285-0847</contrib-id><contrib-id contrib-id-type="scopus">57485591900</contrib-id><name-alternatives><name xml:lang="en"><surname>Glushkova</surname><given-names>Anastasia G.</given-names></name><name xml:lang="ru"><surname>Глушкова</surname><given-names>А. Г.</given-names></name></name-alternatives><bio xml:lang="en"><p>researcher</p></bio><email>aglushkova@endeavorco.com</email><xref ref-type="aff" rid="aff2"/></contrib><contrib contrib-type="author"><contrib-id contrib-id-type="orcid">https://orcid.org/0000-0003-1000-9650</contrib-id><name-alternatives><name xml:lang="en"><surname>Demidova</surname><given-names>Anastasia V.</given-names></name><name xml:lang="ru"><surname>Демидова</surname><given-names>А. В.</given-names></name></name-alternatives><bio xml:lang="en"><p>Candidate of Physical and Mathematical Sciences, Assistant professor of Department of Probability Theory and Cyber Security</p></bio><email>demidova-av@rudn.ru</email><xref ref-type="aff" rid="aff3"/></contrib></contrib-group><aff-alternatives id="aff1"><aff><institution xml:lang="en">Financial University under the Government of the Russian Federation</institution></aff><aff><institution xml:lang="ru">Финансовый университет при Правительстве Российской Федерации</institution></aff></aff-alternatives><aff-alternatives id="aff2"><aff><institution xml:lang="en">Endeavor</institution></aff><aff><institution xml:lang="ru">Эндевор</institution></aff></aff-alternatives><aff-alternatives id="aff3"><aff><institution xml:lang="en">RUDN University</institution></aff><aff><institution xml:lang="ru">Российский университет дружбы народов</institution></aff></aff-alternatives><pub-date date-type="pub" iso-8601-date="2024-10-15" publication-format="electronic"><day>15</day><month>10</month><year>2024</year></pub-date><volume>32</volume><issue>2</issue><issue-title xml:lang="en">VOL 32, NO2 (2024)</issue-title><issue-title xml:lang="ru">ТОМ 32, №2 (2024)</issue-title><fpage>234</fpage><lpage>241</lpage><history><date date-type="received" iso-8601-date="2024-11-01"><day>01</day><month>11</month><year>2024</year></date></history><permissions><copyright-statement xml:lang="en">Copyright ©; 2024, Shchetinin E.Y., Glushkova A.G., Demidova A.V.</copyright-statement><copyright-statement xml:lang="ru">Copyright ©; 2024, Щетинин Е.Ю., Глушкова А.Г., Демидова А.В.</copyright-statement><copyright-year>2024</copyright-year><copyright-holder xml:lang="en">Shchetinin E.Y., Glushkova A.G., Demidova A.V.</copyright-holder><copyright-holder xml:lang="ru">Щетинин Е.Ю., Глушкова А.Г., Демидова А.В.</copyright-holder><ali:free_to_read xmlns:ali="http://www.niso.org/schemas/ali/1.0/"/><license><ali:license_ref xmlns:ali="http://www.niso.org/schemas/ali/1.0/">https://creativecommons.org/licenses/by-nc/4.0</ali:license_ref></license></permissions><self-uri xlink:href="https://journals.rudn.ru/miph/article/view/41391">https://journals.rudn.ru/miph/article/view/41391</self-uri><abstract xml:lang="en"><p style="text-align: justify;">In recent years, artificial intelligence methods have been developed in various fields, particularly in education. The development of computer systems for student learning is an important task and can significantly improve student learning. The development and implementation of deep learning methods in the educational process has gained immense popularity. The most successful among them are models that consider the multimodal nature of information, in particular the combination of text, sound, images, and video. The difficulty in processing such data is that combining multimodal input data by different channel concatenation methods that ignore the heterogeneity of different modalities is an inefficient approach. To solve this problem, an inter-channel attention module is proposed in this paper. The paper presents a computer vision-linguistic system of student learning process based on the concatenation of multimodal input data using the inter-channel attention module. It is shown that the creation of effective and flexible learning systems and technologies based on such models allows to adapt the educational process to the individual needs of students and increase its efficiency.</p></abstract><trans-abstract xml:lang="ru"><p style="text-align: justify;">В последние годы методы искусственного интеллекта получили большое развитие в различных областях, в частности в образовании. Разработка компьютерных систем для обучения студентов является важной задачей и может значительно улучшить процесс обучения студентов. Разработка и внедрение методов глубокого обучения в образовательный процесс приобрели огромную популярность. Наиболее успешными среди них являются модели, учитывающие мультимодальный характер информации, в частности сочетание текста, звука, изображений и видео. Сложность обработки таких данных состоит в том, что объединение мультимодальных входных данных различными методами конкатенации каналов, игнорирующих неоднородность разных модальностей, является неэффективным подходом. Для решения этой проблемы в работе предложен междуканальный модуль внимания. В статье представлена компьютерная визуально-лингвистическая система процесса обучения студентов, основанная на объединении мультимодальных входных данных с использованием междуканального модуля внимания. Показано, что создание эффективных и гибких систем и технологий обучения на основе таких моделей позволяет адаптировать образовательный процесс к индивидуальным потребностям обучающихся и повысить его эффективность.</p></trans-abstract><kwd-group xml:lang="en"><kwd>deep learning</kwd><kwd>vision-language learning model</kwd><kwd>neural networks-transformers</kwd><kwd>throughchannel attention module</kwd></kwd-group><kwd-group xml:lang="ru"><kwd>глубокое обучение</kwd><kwd>модель визуально-лингвистического обучения</kwd><kwd>нейронные сетитрансформеры</kwd><kwd>модуль сквозного внимания</kwd></kwd-group><funding-group/></article-meta></front><body></body><back><ref-list><ref id="B1"><label>1.</label><mixed-citation>Devlin, J., Chang, M., Lee, K. &amp; K., T. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding 2018.</mixed-citation></ref><ref id="B2"><label>2.</label><mixed-citation>Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A. N., Kaiser, Ł. &amp; Polosukhin, I. Attention is All you Need in Advances in Neural Information Processing Systems (eds Guyon, I., Luxburg, U. V., Bengio, S., Wallach, H., Fergus, R., Vishwanathan, S. &amp; Garnett, R.) 30 (Curran Associates, Inc., 2017), 5998-6008.</mixed-citation></ref><ref id="B3"><label>3.</label><mixed-citation>Liu Y. andOtt, M., Goyal N. andDu, J., Joshi, M., Chen, D., Levy, O., Lewis M. andZettlemoyer, L. &amp; V., S. RoBERTa: A Robustly Optimized BERT Pretraining Approach 2019.</mixed-citation></ref><ref id="B4"><label>4.</label><mixed-citation>Clark, E. &amp; Gardner, M. Simple and Effective Multi-Paragraph Reading Comprehension 2018.</mixed-citation></ref><ref id="B5"><label>5.</label><mixed-citation>Klein, G., Kim, Y., Deng, Y., Senellart, J. &amp; Rush, A. OpenNMT: Open-Source Toolkit for Neural Machine Translation in Proceedings of ACL 2017, System Demonstrations (eds Bansal, M. &amp; Ji, H.) 28 (Association for Computational Linguistics, Vancouver, Canada, July 2017), 67-72. doi:10.18653/V1/P17-4012.</mixed-citation></ref><ref id="B6"><label>6.</label><mixed-citation>Radford, A., Narasimhan, K., Salimans, T. &amp; Sutskever, I. Improving language understanding by generative pre-training https://cdn.openai.com/research-covers/language-unsupervised/language_understanding_paper.pdf.</mixed-citation></ref><ref id="B7"><label>7.</label><mixed-citation>Nogueira, R. &amp; Cho, K. Passage Re-ranking with BERT 2019.</mixed-citation></ref><ref id="B8"><label>8.</label><mixed-citation>Schröder, S., Niekler, A. &amp; Potthast, M. Revisiting Uncertainty-based Query Strategies for Active Learning with Transformers 2021.</mixed-citation></ref><ref id="B9"><label>9.</label><mixed-citation>Yang, F., Wang, X., Ma, H. &amp; Li, J. Transformers-sklearn: a toolkit for medical language understanding with transformer-based models. BMC Medical Informatics and Decision Making 21, 141-157. doi:10.1186/s12911-021-01459-0 (2021).</mixed-citation></ref><ref id="B10"><label>10.</label><mixed-citation>Rashid, M., Höhne, J., Schmitz, G. &amp; Müller-Putz, G. A Review of Humanoid Robots Controlled by Brain-Computer Interfaces. Frontiers in Neurorobotics, 1-28 (2020).</mixed-citation></ref></ref-list></back></article>
