given the vim plugin delimited by triple backicks:

" vim: ts=4 sts=4 expandtab " colors (adjust to your liking) highlight default llama_hl_hint guifg=#ff772f ctermfg=202 highlight default llama_hl_info guifg=#77ff2f ctermfg=119 " general parameters: " "   endpoint:         llama.cpp server endpoint "   api_key:          llama.cpp server api key (optional) "   n_prefix:         number of lines before the cursor location to include in the local prefix "   n_suffix:         number of lines after  the cursor location to include in the local suffix "   n_predict:        max number of tokens to predict "   stop_strings      return the result immediately as soon as any of these strings are encountered in the generated text "   t_max_prompt_ms:  max alloted time for the prompt processing (TODO: not yet supported) "   t_max_predict_ms: max alloted time for the prediction "   show_info:        show extra info about the inference (0 - disabled, 1 - statusline, 2 - inline) "   auto_fim:         trigger FIM completion automatically on cursor movement "   max_line_suffix:  do not auto-trigger FIM completion if there are more than this number of characters to the right of the cursor "   max_cache_keys:   max number of cached completions to keep in result_cache " " ring buffer of chunks, accumulated with time upon: " "  - completion request "  - yank "  - entering a buffer "  - leaving a buffer "  - writing a file " " parameters for the ring-buffer with extra context: " "   ring_n_chunks:    max number of chunks to pass as extra context to the server (0 to disable) "   ring_chunk_size:  max size of the chunks (in number of lines) "                     note: adjust these numbers so that you don't overrun your context "                           at ring_n_chunks = 64 and ring_chunk_size = 64 you need ~32k context "   ring_scope:       the range around the cursor position (in number of lines) for gathering chunks after FIM "   ring_update_ms:   how often to process queued chunks in normal mode " " keymaps parameters: " "   keymap_trigger:     keymap to trigger the completion, default: <C-F> "   keymap_accept_full: keymap to accept full suggestion, default: <Tab> "   keymap_accept_line: keymap to accept line suggestion, default: <S-Tab> "   keymap_accept_word: keymap to accept word suggestion, default: <C-B> " let s:default_config = {     \ 'endpoint':           'http://127.0.0.1:8012/infill',     \ 'api_key':            '',     \ 'n_prefix':           256,     \ 'n_suffix':           64,     \ 'n_predict':          128,     \ 'stop_strings':       [],     \ 't_max_prompt_ms':    500,     \ 't_max_predict_ms':   1000,     \ 'show_info':          2,     \ 'auto_fim':           v:true,     \ 'max_line_suffix':    8,     \ 'max_cache_keys':     250,     \ 'ring_n_chunks':      16,     \ 'ring_chunk_size':    64,     \ 'ring_scope':         1024,     \ 'ring_update_ms':     1000,     \ 'keymap_trigger':     "<C-F>",     \ 'keymap_accept_full': "<Tab>",     \ 'keymap_accept_line': "<S-Tab>",     \ 'keymap_accept_word': "<C-B>",     \ } let llama_config = get(g:, 'llama_config', s:default_config) let g:llama_config = extendnew(s:default_config, llama_config, 'force') let s:llama_enabled = v:true " containes cached responses from the server " used to avoid re-computing the same completions and to also create new completions with similar context " ref: https://github.com/ggml-org/llama.vim/pull/18 let g:cache_data = {} " TODO: Currently the cache uses a random eviction policy. A more clever policy could be implemented (eg. LRU). function! s:cache_insert(key, value)     if len(keys(g:cache_data)) > (g:llama_config.max_cache_keys - 1)         let l:keys = keys(g:cache_data)         let l:hash = l:keys[rand() % len(l:keys)]         call remove(g:cache_data, l:hash)     endif     let g:cache_data[a:key] = a:value endfunction " get the number of leading spaces of a string function! s:get_indent(str)     let l:count = 0     for i in range(len(a:str))         if a:str[i] == "\t"             let l:count += &tabstop - 1         else             break         endif     endfor     return l:count endfunction function! s:rand(i0, i1) abort     return a:i0 + rand() % (a:i1 - a:i0 + 1) endfunction function! llama#disable()     call llama#fim_hide()     autocmd! llama     exe "silent! iunmap " .. g:llama_config.keymap_trigger endfunction function! llama#toggle()     if s:llama_enabled         call llama#disable()     else         call llama#init()     endif     let s:llama_enabled = !s:llama_enabled endfunction function llama#setup_commands()     command! LlamaEnable  call llama#init()     command! LlamaDisable call llama#disable()     command! LlamaToggle  call llama#toggle() endfunction function! llama#init()     if !executable('curl')         echohl WarningMsg         echo 'llama.vim requires the "curl" command to be available'         echohl None         return     endif     call llama#setup_commands()     let s:fim_data = {}     let s:ring_chunks = [] " current set of chunks used as extra context     let s:ring_queued = [] " chunks that are queued to be sent for processing     let s:ring_n_evict = 0     let s:hint_shown = v:false     let s:pos_y_pick = -9999 " last y where we picked a chunk     let s:indent_last = -1   " last indentation level that was accepted (TODO: this might be buggy)     let s:timer_fim = -1     let s:t_last_move = reltime() " last time the cursor moved     let s:current_job = v:null     let s:ghost_text_nvim = exists('*nvim_buf_get_mark')     let s:ghost_text_vim = has('textprop')     if s:ghost_text_vim         if version < 901             echom 'Warning: llama.vim requires version 901 or greater. Current version: ' . version         endif         let s:hlgroup_hint = 'llama_hl_hint'         let s:hlgroup_info = 'llama_hl_info'         if empty(prop_type_get(s:hlgroup_hint))             call prop_type_add(s:hlgroup_hint, {'highlight': s:hlgroup_hint})         endif         if empty(prop_type_get(s:hlgroup_info))             call prop_type_add(s:hlgroup_info, {'highlight': s:hlgroup_info})         endif     endif     augroup llama         autocmd!         exe "autocmd InsertEnter _inoremap <expr> <silent> " .. g:llama_config.keymap_trigger .. " llama#fim_inline(v:false, v:false)"         autocmd InsertLeavePre_   call llama#fim_hide()         autocmd CursorMoved      _call s:on_move()         autocmd CursorMovedI_     call s:on_move()         autocmd CompleteChanged _call llama#fim_hide()         autocmd CompleteDone_     call s:on_move()         if g:llama_config.auto_fim             autocmd CursorMovedI _call llama#fim(-1, -1, v:true, [], v:true)         endif         " gather chunks upon yanking         autocmd TextYankPost_     if v:event.operator ==# 'y' | call s:pick_chunk(v:event.regcontents, v:false, v:true) | endif         " gather chunks upon entering/leaving a buffer         autocmd BufEnter         _call timer_start(100, {-> s:pick_chunk(getline(max([1, line('.') - g:llama_config.ring_chunk_size/2]), min([line('.') + g:llama_config.ring_chunk_size/2, line('$')])), v:true, v:true)})         autocmd BufLeave_         call                      s:pick_chunk(getline(max([1, line('.') - g:llama_config.ring_chunk_size/2]), min([line('.') + g:llama_config.ring_chunk_size/2, line('$')])), v:true, v:true)         " gather chunk upon saving the file         autocmd BufWritePost     _call s:pick_chunk(getline(max([1, line('.') - g:llama_config.ring_chunk_size/2]), min([line('.') + g:llama_config.ring_chunk_size/2, line('$')])), v:true, v:true)     augroup END     silent! call llama#fim_hide()     " init background update of the ring buffer     if g:llama_config.ring_n_chunks > 0         call s:ring_update()     endif endfunction " compute how similar two chunks of text are " 0 - no similarity, 1 - high similarity " TODO: figure out something better function! s:chunk_sim(c0, c1)     let l:lines0 = len(a:c0)     let l:lines1 = len(a:c1)     let l:common = 0     for l:line0 in a:c0         for l:line1 in a:c1             if l:line0 == l:line1                 let l:common += 1                 break             endif         endfor     endfor     return 2.0_ l:common / (l:lines0 + l:lines1) endfunction " pick a random chunk of size g:llama_config.ring_chunk_size from the provided text and queue it for processing " " no_mod   - do not pick chunks from buffers with pending changes " do_evict - evict chunks that are very similar to the new one " function! s:pick_chunk(text, no_mod, do_evict)     " do not pick chunks from buffers with pending changes or buffers that are not files     if a:no_mod && (getbufvar(bufnr('%'), '&modified') || !buflisted(bufnr('%')) || !filereadable(expand('%')))         return     endif     " if the extra context option is disabled - do nothing     if g:llama_config.ring_n_chunks <= 0         return     endif     " don't pick very small chunks     if len(a:text) < 3         return     endif     if len(a:text) + 1 < g:llama_config.ring_chunk_size         let l:chunk = a:text     else         let l:l0 = s:rand(0, max([0, len(a:text) - g:llama_config.ring_chunk_size/2]))         let l:l1 = min([l:l0 + g:llama_config.ring_chunk_size/2, len(a:text)])         let l:chunk = a:text[l:l0:l:l1]     endif     let l:chunk_str = join(l:chunk, "\n") . "\n"     " check if this chunk is already added     let l:exist = v:false     for i in range(len(s:ring_chunks))         if s:ring_chunks[i].data == l:chunk             let l:exist = v:true             break         endif     endfor     for i in range(len(s:ring_queued))         if s:ring_queued[i].data == l:chunk             let l:exist = v:true             break         endif     endfor     if l:exist         return     endif     " evict queued chunks that are very similar to the new one     for i in range(len(s:ring_queued) - 1, 0, -1)         if s:chunk_sim(s:ring_queued[i].data, l:chunk) > 0.9             if a:do_evict                 call remove(s:ring_queued, i)                 let s:ring_n_evict += 1             else                 return             endif         endif     endfor     " also from s:ring_chunks     for i in range(len(s:ring_chunks) - 1, 0, -1)         if s:chunk_sim(s:ring_chunks[i].data, l:chunk) > 0.9             if a:do_evict                 call remove(s:ring_chunks, i)                 let s:ring_n_evict += 1             else                 return             endif         endif     endfor     " TODO: become parameter ?     if len(s:ring_queued) == 16         call remove(s:ring_queued, 0)     endif     call add(s:ring_queued, {'data': l:chunk, 'str': l:chunk_str, 'time': reltime(), 'filename': expand('%')})     "let &statusline = 'extra context: ' . len(s:ring_chunks) . ' / ' . len(s:ring_queued) endfunction " picks a queued chunk, sends it for processing and adds it to s:ring_chunks " called every g:llama_config.ring_update_ms function! s:ring_update()     call timer_start(g:llama_config.ring_update_ms, {-> s:ring_update()})     " update only if in normal mode or if the cursor hasn't moved for a while     if mode() !=# 'n' && reltimefloat(reltime(s:t_last_move)) < 3.0         return     endif     if len(s:ring_queued) 0         return     endif     " move the first queued chunk to the ring buffer     if len(s:ring_chunks) g:llama_config.ring_n_chunks         call remove(s:ring_chunks, 0)     endif     call add(s:ring_chunks, remove(s:ring_queued, 0))     "let &statusline = 'updated context: ' . len(s:ring_chunks) . ' / ' . len(s:ring_queued)     " send asynchronous job with the new extra context so that it is ready for the next FIM     let l:extra_context = []     for l:chunk in s:ring_chunks         call add(l:extra_context, {             \ 'text':     l:chunk.str,             \ 'time':     l:chunk.time,             \ 'filename': l:chunk.filename             \ })     endfor     " no samplers needed here     let l:request = json_encode({         \ 'input_prefix':     "",         \ 'input_suffix':     "",         \ 'input_extra':      l:extra_context,         \ 'prompt':           "",         \ 'n_predict':        0,         \ 'temperature':      0.0,         \ 'stream':           v:false,         \ 'samplers':         [],         \ 'cache_prompt':     v:true,         \ 't_max_prompt_ms':  1,         \ 't_max_predict_ms': 1,         \ 'response_fields':  [""]         \ })     let l:curl_command = [         \ "curl",         \ "--silent",         \ "--no-buffer",         \ "--request", "POST",         \ "--url", g:llama_config.endpoint,         \ "--header", "Content-Type: application/json",         \ "--data", "@-",         \ ]     if exists ("g:llama_config.api_key") && len("g:llama_config.api_key") > 0         call extend(l:curl_command, ['--header', 'Authorization: Bearer ' .. g:llama_config.api_key])     endif     " no callbacks because we don't need to process the response     if s:ghost_text_nvim         let jobid = jobstart(l:curl_command, {})         call chansend(jobid, l:request)         call chanclose(jobid, 'stdin')     elseif s:ghost_text_vim         let jobid = job_start(l:curl_command, {})         let channel = job_getchannel(jobid)         call ch_sendraw(channel, l:request)         call ch_close_in(channel)     endif endfunction " get the local context at a specified position " a:prev can optionally contain a previous completion for this position "   in such cases, create the local context as if the completion was already inserted function! s:fim_ctx_local(pos_x, pos_y, prev)     let l:max_y = line('$')     if empty(a:prev)         let l:line_cur = getline(a:pos_y)         let l:line_cur_prefix = strpart(l:line_cur, 0, a:pos_x)         let l:line_cur_suffix = strpart(l:line_cur, a:pos_x)         let l:lines_prefix = getline(max([1, a:pos_y - g:llama_config.n_prefix]), a:pos_y - 1)         let l:lines_suffix = getline(a:pos_y + 1, min([l:max_y, a:pos_y + g:llama_config.n_suffix]))         " special handling of lines full of whitespaces - start from the beginning of the line         if match(l:line_cur, '^\s*$') >= 0             let l:indent = 0             let l:line_cur_prefix = ""             let l:line_cur_suffix = ""         else             " the indentation of the current line             let l:indent = strlen(matchstr(l:line_cur, '^\s*'))         endif     else         if len(a:prev) == 1             let l:line_cur = getline(a:pos_y) . a:prev[0]         else             let l:line_cur = a:prev[-1]         endif         let l:line_cur_prefix = l:line_cur         let l:line_cur_suffix = ""         let l:lines_prefix = getline(max([1, a:pos_y - g:llama_config.n_prefix + len(a:prev) - 1]), a:pos_y - 1)         if len(a:prev) > 1             call add(l:lines_prefix, getline(a:pos_y) . a:prev[0])             for l:line in a:prev[1:-2]                 call add(l:lines_prefix, l:line)             endfor         endif         let l:lines_suffix = getline(a:pos_y + 1, min([l:max_y, a:pos_y + g:llama_config.n_suffix]))         let l:indent = s:indent_last     endif     let l:prefix = ""         \ . join(l:lines_prefix, "\n")         \ . "\n"     let l:middle = ""         \ . l:line_cur_prefix     let l:suffix = ""         \ . l:line_cur_suffix         \ . "\n"         \ . join(l:lines_suffix, "\n")         \ . "\n"     let l:res = {}     let l:res['prefix'] = l:prefix     let l:res['middle'] = l:middle     let l:res['suffix'] = l:suffix     let l:res['indent'] = l:indent     let l:res['line_cur'] = l:line_cur     let l:res['line_cur_prefix'] = l:line_cur_prefix     let l:res['line_cur_suffix'] = l:line_cur_suffix     return l:res endfunction " necessary for 'inoremap <expr>' function! llama#fim_inline(is_auto, use_cache) abort     " we already have a suggestion displayed - hide it     if s:hint_shown && !a:is_auto         call llama#fim_hide()         return ''     endif     call llama#fim(-1, -1, a:is_auto, [], a:use_cache)     return '' endfunction " the main FIM call " takes local context around the cursor and sends it together with the extra context to the server for completion function! llama#fim(pos_x, pos_y, is_auto, prev, use_cache) abort     let l:pos_x = a:pos_x     let l:pos_y = a:pos_y     if l:pos_x < 0         let l:pos_x = col('.') - 1     endif     if l:pos_y < 0         let l:pos_y = line('.')     endif     " avoid sending repeated requests too fast     if s:current_job != v:null         if s:timer_fim != -1             call timer_stop(s:timer_fim)             let s:timer_fim = -1         endif         let s:timer_fim = timer_start(100, {-> llama#fim(a:pos_x, a:pos_y, v:true, a:prev, a:use_cache)})         return     endif     "if s:hint_shown && empty(a:prev)     "    return     "endif     "let s:t_fim_start = reltime()     let l:ctx_local = s:fim_ctx_local(l:pos_x, l:pos_y, a:prev)     let l:prefix = l:ctx_local['prefix']     let l:middle = l:ctx_local['middle']     let l:suffix = l:ctx_local['suffix']     let l:indent = l:ctx_local['indent']     if a:is_auto && len(l:ctx_local['line_cur_suffix']) > g:llama_config.max_line_suffix         return     endif     let l:t_max_predict_ms = g:llama_config.t_max_predict_ms     if empty(a:prev)         " the first request is quick - we will launch a speculative request after this one is displayed         let l:t_max_predict_ms = 250     endif     " compute multiple hashes that can be used to generate a completion for which the     "   first few lines are missing. this happens when we have scrolled down a bit from where the original     "   generation was done     "     let l:hashes = []     call add(l:hashes, sha256(l:prefix . l:middle . 'Î' . l:suffix))     let l:prefix_trim = l:prefix     for i in range(3)         let l:prefix_trim = substitute(l:prefix_trim, '^[^\n]*\n', '', '')         if empty(l:prefix_trim)             break         endif         call add(l:hashes, sha256(l:prefix_trim . l:middle . 'Î' . l:suffix))     endfor     " if we already have a cached completion for one of the hashes, don't send a request     if a:use_cache         for l:hash in l:hashes             if get(g:cache_data, l:hash, v:null) != v:null                 return             endif         endfor     endif     " TODO: this might be incorrect     let s:indent_last = l:indent     " TODO: refactor in a function     let l:text = getline(max([1, line('.') - g:llama_config.ring_chunk_size/2]), min([line('.') + g:llama_config.ring_chunk_size/2, line('$')]))     let l:l0 = s:rand(0, max([0, len(l:text) - g:llama_config.ring_chunk_size/2]))     let l:l1 = min([l:l0 + g:llama_config.ring_chunk_size/2, len(l:text)])     let l:chunk = l:text[l:l0:l:l1]     " evict chunks that are very similar to the current context     " this is needed because such chunks usually distort the completion to repeat what was already there     for i in range(len(s:ring_chunks) - 1, 0, -1)         if s:chunk_sim(s:ring_chunks[i].data, l:chunk) > 0.5             call remove(s:ring_chunks, i)             let s:ring_n_evict += 1         endif     endfor     " prepare the extra context data     let l:extra_ctx = []     for l:chunk in s:ring_chunks         call add(l:extra_ctx, {             \ 'text':     l:chunk.str,             \ 'time':     l:chunk.time,             \ 'filename': l:chunk.filename             \ })     endfor     let l:request = json_encode({         \ 'input_prefix':     l:prefix,         \ 'input_suffix':     l:suffix,         \ 'input_extra':      l:extra_ctx,         \ 'prompt':           l:middle,         \ 'n_predict':        g:llama_config.n_predict,         \ 'stop':             g:llama_config.stop_strings,         \ 'n_indent':         l:indent,         \ 'top_k':            40,         \ 'top_p':            0.90,         \ 'stream':           v:false,         \ 'samplers':         ["top_k", "top_p", "infill"],         \ 'cache_prompt':     v:true,         \ 't_max_prompt_ms':  g:llama_config.t_max_prompt_ms,         \ 't_max_predict_ms': l:t_max_predict_ms,         \ 'response_fields':  [         \                       "content",         \                       "timings/prompt_n",         \                       "timings/prompt_ms",         \                       "timings/prompt_per_token_ms",         \                       "timings/prompt_per_second",         \                       "timings/predicted_n",         \                       "timings/predicted_ms",         \                       "timings/predicted_per_token_ms",         \                       "timings/predicted_per_second",         \                       "truncated",         \                       "tokens_cached",         \                     ],         \ })     let l:curl_command = [         \ "curl",         \ "--silent",         \ "--no-buffer",         \ "--request", "POST",         \ "--url", g:llama_config.endpoint,         \ "--header", "Content-Type: application/json",         \ "--data", "@-",         \ ]     if exists ("g:llama_config.api_key") && len("g:llama_config.api_key") > 0         call extend(l:curl_command, ['--header', 'Authorization: Bearer ' .. g:llama_config.api_key])     endif     if s:current_job != v:null         if s:ghost_text_nvim             call jobstop(s:current_job)         elseif s:ghost_text_vim             call job_stop(s:current_job)         endif     endif     " send the request asynchronously     if s:ghost_text_nvim         let s:current_job = jobstart(l:curl_command, {             \ 'on_stdout': function('s:fim_on_response', [l:hashes]),             \ 'on_exit':   function('s:fim_on_exit'),             \ 'stdout_buffered': v:true             \ })         call chansend(s:current_job, l:request)         call chanclose(s:current_job, 'stdin')     elseif s:ghost_text_vim         let s:current_job = job_start(l:curl_command, {             \ 'out_cb':    function('s:fim_on_response', [l:hashes]),             \ 'exit_cb':   function('s:fim_on_exit')             \ })         let channel = job_getchannel(s:current_job)         call ch_sendraw(channel, l:request)         call ch_close_in(channel)     endif     " TODO: per-file location     let l:delta_y = abs(l:pos_y - s:pos_y_pick)     " gather some extra context nearby and process it in the background     " only gather chunks if the cursor has moved a lot     " TODO: something more clever? reranking?     if a:is_auto && l:delta_y > 32         let l:max_y = line('$')         " expand the prefix even further         call s:pick_chunk(getline(max([1,       l:pos_y - g:llama_config.ring_scope]), max([1,       l:pos_y - g:llama_config.n_prefix])), v:false, v:false)         " pick a suffix chunk         call s:pick_chunk(getline(min([l:max_y, l:pos_y + g:llama_config.n_suffix]),   min([l:max_y, l:pos_y + g:llama_config.n_suffix + g:llama_config.ring_chunk_size])), v:false, v:false)         let s:pos_y_pick = l:pos_y     endif endfunction " callback that processes the FIM result from the server function! s:fim_on_response(hashes, job_id, data, event = v:null)     if s:ghost_text_nvim         let l:raw = join(a:data, "\n")     elseif s:ghost_text_vim         let l:raw = a:data     endif     " ignore empty results     if len(l:raw) == 0         return     endif     " ensure the response is valid JSON, starting with a fast check before full decode     if l:raw !~# '^\s*{' || l:raw !~# '\v"content"\s*:"'         return     endif     try         let l:response = json_decode(l:raw)     catch         return     endtry     " put the response in the cache     for l:hash in a:hashes         call s:cache_insert(l:hash, l:raw)     endfor     " if nothing is currently displayed - show the hint directly     if !s:hint_shown || !s:fim_data['can_accept']         let l:pos_x = col('.') - 1         let l:pos_y = line('.')         call s:fim_try_hint(l:pos_x, l:pos_y)     endif endfunction function! s:fim_on_exit(job_id, exit_code, event = v:null)     if a:exit_code != 0         echom "Job failed with exit code: " . a:exit_code     endif     let s:current_job = v:null endfunction function! s:on_move()     let s:t_last_move = reltime()     call llama#fim_hide()     let l:pos_x = col('.') - 1     let l:pos_y = line('.')     call s:fim_try_hint(l:pos_x, l:pos_y) endfunction " try to generate a suggestion using the data in the cache function! s:fim_try_hint(pos_x, pos_y)     " show the suggestion only in insert mode     if mode() !~# '\v^(i|ic|ix)$'         return     endif     let l:pos_x = a:pos_x     let l:pos_y = a:pos_y     let l:ctx_local = s:fim_ctx_local(l:pos_x, l:pos_y, [])     let l:prefix = l:ctx_local['prefix']     let l:middle = l:ctx_local['middle']     let l:suffix = l:ctx_local['suffix']     let l:hash = sha256(l:prefix . l:middle . 'Î' . l:suffix)     " Check if the completion is cached     let l:raw = get(g:cache_data, l:hash, v:null)     " ... or if there is a cached completion nearby (10 characters behind)     " Looks at the previous 10 characters to see if a completion is cached. If one is found at (x,y)     " then it checks that the characters typed after (x,y) match up with the cached completion result.     if l:raw == v:null         let l:pm = l:prefix . l:middle         let l:best = 0         for i in range(128)             let l:removed = l:pm[-(1 + i):]             let l:ctx_new = l:pm[:-(2 + i)] . 'Î' . l:suffix             let l:hash_new = sha256(l:ctx_new)             if has_key(g:cache_data, l:hash_new)                 let l:response_cached = get(g:cache_data, l:hash_new)                 if l:response_cached == ""                     continue                 endif                 let l:response = json_decode(l:response_cached)                 if l:response['content'][0:i] !=# l:removed                     continue                 endif                 let l:response['content'] = l:response['content'][i + 1:]                 if len(l:response['content']) > 0                     if l:raw == v:null                         let l:raw = json_encode(l:response)                     elseif len(l:response['content']) > l:best                         let l:best = len(l:response['content'])                         let l:raw = json_encode(l:response)                     endif                 endif             endif         endfor     endif     if l:raw != v:null         call s:fim_render(l:pos_x, l:pos_y, l:raw)         " run async speculative FIM in the background for this position         if s:hint_shown             call llama#fim(l:pos_x, l:pos_y, v:true, s:fim_data['content'], v:true)         endif     endif endfunction " render a suggestion at the current cursor location function! s:fim_render(pos_x, pos_y, data)     " do not show if there is a completion in progress     if pumvisible()         return     endif     let l:raw = a:data     let l:can_accept = v:true     let l:has_info   = v:false     let l:n_prompt    = 0     let l:t_prompt_ms = 1.0     let l:s_prompt    = 0     let l:n_predict    = 0     let l:t_predict_ms = 1.0     let l:s_predict    = 0     let l:content = []     " get the generated suggestion     if l:can_accept         let l:response = json_decode(l:raw)         for l:part in split(get(l:response, 'content', ''), "\n", 1)             call add(l:content, l:part)         endfor         " remove trailing new lines         while len(l:content) > 0 && l:content[-1] == ""             call remove(l:content, -1)         endwhile         let l:n_cached  = get(l:response, 'tokens_cached', 0)         let l:truncated = get(l:response, 'timings/truncated', v:false)         " if response.timings is available         if has_key(l:response, 'timings/prompt_n') && has_key(l:response, 'timings/prompt_ms') && has_key(l:response, 'timings/prompt_per_second')             \ && has_key(l:response, 'timings/predicted_n') && has_key(l:response, 'timings/predicted_ms') && has_key(l:response, 'timings/predicted_per_second')             let l:n_prompt    = get(l:response, 'timings/prompt_n', 0)             let l:t_prompt_ms = get(l:response, 'timings/prompt_ms', 1)             let l:s_prompt    = get(l:response, 'timings/prompt_per_second', 0)             let l:n_predict    = get(l:response, 'timings/predicted_n', 0)             let l:t_predict_ms = get(l:response, 'timings/predicted_ms', 1)             let l:s_predict    = get(l:response, 'timings/predicted_per_second', 0)         endif         let l:has_info = v:true     endif     if len(l:content) == 0         call add(l:content, "")         let l:can_accept = v:false     endif     let l:pos_x = a:pos_x     let l:pos_y = a:pos_y     let l:line_cur = getline(l:pos_y)     " if the current line is full of whitespaces, trim as much whitespaces from the suggestion     if match(l:line_cur, '^\s*$') >= 0         let l:lead = min([strlen(matchstr(l:content[0], '^\s*')), strlen(l:line_cur)])         let l:line_cur   = strpart(l:content[0], 0, l:lead)         let l:content[0] = strpart(l:content[0],    l:lead)     endif     let l:line_cur_prefix = strpart(l:line_cur, 0, l:pos_x)     let l:line_cur_suffix = strpart(l:line_cur, l:pos_x)     " NOTE: the following is logic for discarding predictions that repeat existing text     "       the code is quite ugly and there is very likely a simpler and more canonical way to implement this     "     "       still, I wonder if there is some better way that avoids having to do these special hacks?     "       on one hand, the LLM 'sees' the contents of the file before we start editing, so it is normal that it would     "       start generating whatever we have given it via the extra context. but on the other hand, it's not very     "       helpful to re-generate the same code that is already there     " truncate the suggestion if the first line is empty     if len(l:content) 1 && l:content[0] ""         let l:content = [""]     endif     " ... and the next lines are repeated     if len(l:content) > 1 && l:content[0] "" && l:content[1:] getline(l:pos_y + 1, l:pos_y + len(l:content) - 1)         let l:content = [""]     endif     " truncate the suggestion if it repeats the suffix     if len(l:content) 1 && l:content[0] l:line_cur_suffix         let l:content = [""]     endif     " find the first non-empty line (strip whitespace)     let l:cmp_y = l:pos_y + 1     while l:cmp_y < line('$') && getline(l:cmp_y) =~? '^\s*$'         let l:cmp_y += 1     endwhile     if (l:line_cur_prefix . l:content[0]) getline(l:cmp_y)         " truncate the suggestion if it repeats the next line         if len(l:content) 1             let l:content = [""]         endif         " ... or if the second line of the suggestion is the prefix of line l:cmp_y + 1         if len(l:content) 2 && l:content[-1] getline(l:cmp_y + 1)[:len(l:content[-1]) - 1]             let l:content = [""]         endif         " ... or if the middle chunk of lines of the suggestion is the same as [l:cmp_y + 1, l:cmp_y + len(l:content) - 1)         if len(l:content) > 2 && join(l:content[1:-1], "\n") == join(getline(l:cmp_y + 1, l:cmp_y + len(l:content) - 1), "\n")             let l:content = [""]         endif     endif     " keep only lines that have the same or larger whitespace prefix as l:line_cur_prefix     "let l:indent = strlen(matchstr(l:line_cur_prefix, '^\s*'))     "for i in range(1, len(l:content) - 1)     "    if strlen(matchstr(l:content[i], '^\s*')) < l:indent     "        let l:content = l:content[:i - 1]     "        break     "    endif     "endfor     let l:content[-1] .= l:line_cur_suffix     " if only whitespaces - do not accept     if join(l:content, "\n") =~? '^\s*$'         let l:can_accept = v:false     endif     " display virtual text with the suggestion     let l:bufnr = bufnr('%')     if s:ghost_text_nvim         let l:id_vt_fim = nvim_create_namespace('vt_fim')     endif     let l:info = ''     " construct the info message     if g:llama_config.show_info > 0 && l:has_info         let l:prefix = '   '         if l:truncated             let l:info = printf("%s | WARNING: the context is full: %d, increase the server context size or reduce g:llama_config.ring_n_chunks",                 \ g:llama_config.show_info == 2 ? l:prefix : 'llama.vim',                 \ l:n_cached                 \ )         else             let l:info = printf("%s | c: %d, r: %d/%d, e: %d, q: %d/16, C: %d/%d | p: %d (%.2f ms, %.2f t/s) | g: %d (%.2f ms, %.2f t/s)",                 \ g:llama_config.show_info 2 ? l:prefix : 'llama.vim',                 \ l:n_cached,  len(s:ring_chunks), g:llama_config.ring_n_chunks, s:ring_n_evict, len(s:ring_queued),                 \ len(keys(g:cache_data)), g:llama_config.max_cache_keys,                 \ l:n_prompt,  l:t_prompt_ms,  l:s_prompt,                 \ l:n_predict, l:t_predict_ms, l:s_predict                 \ )         endif         if g:llama_config.show_info 1             " display the info in the statusline             let &statusline = l:info             let l:info = ''         endif     endif     " display the suggestion and append the info to the end of the first line     if s:ghost_text_nvim         call nvim_buf_set_extmark(l:bufnr, l:id_vt_fim, l:pos_y - 1, l:pos_x, {             \ 'virt_text': [[l:content[0], 'llama_hl_hint'], [l:info, 'llama_hl_info']],             \ 'virt_text_pos': l:content == [""] ? 'eol' : 'overlay'             \ })         call nvim_buf_set_extmark(l:bufnr, l:id_vt_fim, l:pos_y - 1, 0, {             \ 'virt_lines': map(l:content[1:], {idx, val -> [[val, 'llama_hl_hint']]})             \ })     elseif s:ghost_text_vim         let l:full_suffix = l:content[0]         if !empty(l:full_suffix)             let l:new_suffix = l:full_suffix[0:-len(l:line_cur[l:pos_x:])-1]             call prop_add(l:pos_y, l:pos_x + 1, {                 \ 'type': s:hlgroup_hint,                 \ 'text': l:new_suffix                 \ })         endif         for line in l:content[1:]             call prop_add(l:pos_y, 0, {                 \ 'type': s:hlgroup_hint,                 \ 'text': line,                 \ 'text_padding_left': s:get_indent(line),                 \ 'text_align': 'below'                 \ })         endfor         if !empty(l:info)             call prop_add(l:pos_y, 0, {                 \ 'type': s:hlgroup_info,                 \ 'text': l:info,                 \ 'text_wrap': 'truncate'                 \ })         endif     endif     " setup accept shortcuts     exe 'inoremap <buffer> ' . g:llama_config.keymap_accept_full . ' <C-O>:call llama#fim_accept(''full'')<CR>'     exe 'inoremap <buffer> ' . g:llama_config.keymap_accept_line . ' <C-O>:call llama#fim_accept(''line'')<CR>'     exe 'inoremap <buffer> ' . g:llama_config.keymap_accept_word . ' <C-O>:call llama#fim_accept(''word'')<CR>'     let s:hint_shown = v:true     let s:fim_data['pos_x']  = l:pos_x     let s:fim_data['pos_y']  = l:pos_y     let s:fim_data['line_cur'] = l:line_cur     let s:fim_data['can_accept'] = l:can_accept     let s:fim_data['content']    = l:content endfunction " if accept_type 'full', accept entire response " if accept_type 'line', accept only the first line of the response " if accept_type == 'word', accept only the first word of the response function! llama#fim_accept(accept_type)     let l:pos_x  = s:fim_data['pos_x']     let l:pos_y  = s:fim_data['pos_y']     let l:line_cur = s:fim_data['line_cur']     let l:can_accept = s:fim_data['can_accept']     let l:content    = s:fim_data['content']     if l:can_accept && len(l:content) > 0         " insert suggestion on current line         if a:accept_type != 'word'             " insert first line of suggestion             call setline(l:pos_y, l:line_cur[:(l:pos_x - 1)] . l:content[0])         else             " insert first word of suggestion             let l:suffix = l:line_cur[(l:pos_x):]             let l:word = matchstr(l:content[0][:-(len(l:suffix) + 1)], '^\s*\S\+')             call setline(l:pos_y, l:line_cur[:(l:pos_x - 1)] . l:word . l:suffix)         endif         " insert rest of suggestion         if len(l:content) > 1 && a:accept_type 'full'             call append(l:pos_y, l:content[1:-1])         endif         " move cusor         if a:accept_type 'word'             " move cursor to end of word             call cursor(l:pos_y, l:pos_x + len(l:word) + 1)         elseif a:accept_type 'line' || len(l:content) 1             " move cursor for 1-line suggestion             call cursor(l:pos_y, l:pos_x + len(l:content[0]) + 1)             if len(l:content) > 2                 " simulate pressing Enter to move to next line                 call feedkeys("\<CR>")             endif         else             " move cursor for multi-line suggestion             call cursor(l:pos_y + len(l:content) - 1, len(l:content[-1]) + 1)         endif     endif      call llama#fim_hide() endfunction  function! llama#fim_hide()     let s:hint_shown = v:false      " clear the virtual text     let l:bufnr = bufnr('%')      if s:ghost_text_nvim         let l:id_vt_fim = nvim_create_namespace('vt_fim')         call nvim_buf_clear_namespace(l:bufnr, l:id_vt_fim,  0, -1)     elseif s:ghost_text_vim         call prop_remove({'type': s:hlgroup_hint, 'all': v:true})         call prop_remove({'type': s:hlgroup_info, 'all': v:true})     endif      " remove the mappings     exe 'silent! iunmap <buffer> ' . g:llama_config.keymap_accept_full     exe 'silent! iunmap <buffer> ' . g:llama_config.keymap_accept_line     exe 'silent! iunmap <buffer> ' . g:llama_config.keymap_accept_word endfunction

Qwen3 Coder 30b

Qt Creator Plugin for LLaMA Infill Completion

I’ll create a Qt Creator plugin that provides similar functionality to the vim plugin you’ve shared. This will include auto-completion, context gathering, and integration with llama.cpp server.

Plugin Structure

// llama_plugin.h
#ifndef LLAMA_PLUGIN_H
#define LLAMA_PLUGIN_H

#include <extensionsystem/iplugin.h>
#include <QTimer>
#include <QJsonObject>
#include <QJsonArray>
#include <QNetworkAccessManager>
#include <QNetworkRequest>
#include <QNetworkReply>
#include <QSettings>

namespace Llama {
namespace Internal {

class LlamaPlugin : public ExtensionSystem::IPlugin
{
    Q_OBJECT
    Q_PLUGIN_METADATA(IID "org.qt-project.Qt.QtCreatorPlugin" FILE "LlamaPlugin.json")

public:
    LlamaPlugin();
    ~LlamaPlugin();

    bool initialize(const QStringList &arguments, QString *errorString) override;
    void extensionsInitialized() override;
    ShutdownFlag aboutToShutdown() override;

private slots:
    void handleEditorOpened(Core::IEditor *editor);
    void handleEditorClosed(Core::IEditor *editor);
    void handleCursorPositionChanged();
    void handleTextChanged();
    void checkForCompletion();

private:
    void setupConfiguration();
    void loadConfiguration();
    void saveConfiguration();
    
    // Completion handling
    void requestCompletion(int pos_x, int pos_y, bool isAuto = false);
    void processCompletionResponse(const QString &response);
    void showCompletionHint(const QString &content, int pos_x, int pos_y);
    void hideCompletionHint();
    
    // Context management
    QString getLocalContext(int pos_x, int pos_y, const QString &prev = QString());
    void gatherExtraContext();
    void processExtraContext();
    
    // Configuration
    struct Config {
        QString endpoint;
        QString apiKey;
        int nPrefix;
        int nSuffix;
        int nPredict;
        QStringList stopStrings;
        int tMaxPromptMs;
        int tMaxPredictMs;
        int showInfo;
        bool autoFim;
        int maxLineSuffix;
        int maxCacheKeys;
        int ringNChunks;
        int ringChunkSize;
        int ringScope;
        int ringUpdateMs;
        QString keymapTrigger;
        QString keymapAcceptFull;
        QString keymapAcceptLine;
        QString keymapAcceptWord;
    } m_config;
    
    // Caching
    QHash<QString, QString> m_cacheData;
    
    // Context chunks
    struct Chunk {
        QString data;
        QDateTime time;
        QString filename;
    };
    QList<Chunk> m_ringChunks;
    QList<Chunk> m_ringQueued;
    int m_ringNEvict;
    
    // State tracking
    bool m_hintShown;
    QPoint m_lastPos;
    int m_indentLast;
    QTimer *m_completionTimer;
    QNetworkAccessManager *m_networkManager;
    QString m_currentRequestId;
    
    // Editor tracking
    Core::IEditor *m_currentEditor;
    QTextDocument *m_currentDocument;
};

} // namespace Internal
} // namespace Llama

#endif // LLAMA_PLUGIN_H

// llama_plugin.cpp
#include "llama_plugin.h"
#include "llama_settings.h"

#include <coreplugin/editormanager/editormanager.h>
#include <coreplugin/editormanager/ieditor.h>
#include <coreplugin/documentmanager.h>
#include <texteditor/texteditor.h>
#include <texteditor/textdocument.h>
#include <utils/qtcassert.h>
#include <utils/fileutils.h>
#include <QSettings>
#include <QDir>
#include <QApplication>
#include <QJsonDocument>
#include <QJsonObject>
#include <QJsonArray>
#include <QNetworkRequest>
#include <QNetworkReply>
#include <QTimer>
#include <QMessageBox>
#include <QDebug>

using namespace Core;
using namespace TextEditor;

namespace Llama {
namespace Internal {

LlamaPlugin::LlamaPlugin()
    : m_hintShown(false)
    , m_ringNEvict(0)
    , m_currentEditor(nullptr)
    , m_currentDocument(nullptr)
    , m_completionTimer(new QTimer(this))
    , m_networkManager(new QNetworkAccessManager(this))
{
    // Set up default configuration
    m_config.endpoint = "http://127.0.0.1:8012/infill";
    m_config.apiKey = "";
    m_config.nPrefix = 256;
    m_config.nSuffix = 64;
    m_config.nPredict = 128;
    m_config.stopStrings = QStringList();
    m_config.tMaxPromptMs = 500;
    m_config.tMaxPredictMs = 1000;
    m_config.showInfo = 2;
    m_config.autoFim = true;
    m_config.maxLineSuffix = 8;
    m_config.maxCacheKeys = 250;
    m_config.ringNChunks = 16;
    m_config.ringChunkSize = 64;
    m_config.ringScope = 1024;
    m_config.ringUpdateMs = 1000;
    m_config.keymapTrigger = "<C-F>";
    m_config.keymapAcceptFull = "<Tab>";
    m_config.keymapAcceptLine = "<S-Tab>";
    m_config.keymapAcceptWord = "<C-B>";
    
    connect(m_completionTimer, &QTimer::timeout, this, &LlamaPlugin::checkForCompletion);
}

LlamaPlugin::~LlamaPlugin()
{
    if (m_currentEditor)
        disconnect(m_currentEditor->document(), &QTextDocument::contentsChanged, 
                   this, &LlamaPlugin::handleTextChanged);
}

bool LlamaPlugin::initialize(const QStringList &arguments, QString *errorString)
{
    Q_UNUSED(arguments)
    Q_UNUSED(errorString)
    
    // Connect to editor manager signals
    connect(EditorManager::instance(), &EditorManager::editorOpened,
            this, &LlamaPlugin::handleEditorOpened);
    connect(EditorManager::instance(), &EditorManager::editorClosed,
            this, &LlamaPlugin::handleEditorClosed);
            
    // Load configuration
    loadConfiguration();
    
    // Set up timer for context gathering
    if (m_config.ringNChunks > 0) {
        m_completionTimer->start(m_config.ringUpdateMs);
    }
    
    return true;
}

void LlamaPlugin::extensionsInitialized()
{
    // Initialize any extensions here
}

ExtensionSystem::IPlugin::ShutdownFlag LlamaPlugin::aboutToShutdown()
{
    saveConfiguration();
    return SynchronousShutdown;
}

void LlamaPlugin::handleEditorOpened(Core::IEditor *editor)
{
    if (!editor) return;
    
    m_currentEditor = editor;
    m_currentDocument = editor->document();
    
    // Connect to text changes
    connect(m_currentDocument, &QTextDocument::contentsChanged,
            this, &LlamaPlugin::handleTextChanged);
            
    // Connect to cursor position changes
    connect(qobject_cast<TextEditorWidget*>(editor->widget()), 
            &TextEditorWidget::cursorPositionChanged,
            this, &LlamaPlugin::handleCursorPositionChanged);
}

void LlamaPlugin::handleEditorClosed(Core::IEditor *editor)
{
    if (m_currentEditor == editor) {
        m_currentEditor = nullptr;
        m_currentDocument = nullptr;
        hideCompletionHint();
    }
}

void LlamaPlugin::handleCursorPositionChanged()
{
    if (!m_currentEditor || !m_config.autoFim) return;
    
    // Trigger completion check
    checkForCompletion();
}

void LlamaPlugin::handleTextChanged()
{
    if (!m_currentEditor) return;
    
    // Gather context when text changes
    gatherExtraContext();
}

void LlamaPlugin::checkForCompletion()
{
    if (!m_currentEditor || !m_config.autoFim) return;
    
    TextEditorWidget *editor = qobject_cast<TextEditorWidget*>(m_currentEditor->widget());
    if (!editor) return;
    
    QTextCursor cursor = editor->textCursor();
    int pos_x = cursor.positionInBlock();
    int pos_y = cursor.blockNumber() + 1;
    
    // Check if we're at the end of a line (auto completion trigger)
    QTextBlock block = cursor.block();
    if (pos_x >= block.length() - 1) {
        requestCompletion(pos_x, pos_y, true);
    }
}

void LlamaPlugin::setupConfiguration()
{
    // Create configuration dialog
    LlamaSettings *settingsDialog = new LlamaSettings(m_config, nullptr);
    
    connect(settingsDialog, &LlamaSettings::configChanged, 
            [this](const Config &newConfig) {
        m_config = newConfig;
        saveConfiguration();
        
        if (m_config.ringNChunks > 0 && !m_completionTimer->isActive()) {
            m_completionTimer->start(m_config.ringUpdateMs);
        } else if (m_config.ringNChunks == 0 && m_completionTimer->isActive()) {
            m_completionTimer->stop();
        }
    });
    
    settingsDialog->show();
}

void LlamaPlugin::loadConfiguration()
{
    QSettings settings(QSettings::UserScope, "QtProject", "LlamaPlugin");
    
    m_config.endpoint = settings.value("endpoint", m_config.endpoint).toString();
    m_config.apiKey = settings.value("apiKey", m_config.apiKey).toString();
    m_config.nPrefix = settings.value("nPrefix", m_config.nPrefix).toInt();
    m_config.nSuffix = settings.value("nSuffix", m_config.nSuffix).toInt();
    m_config.nPredict = settings.value("nPredict", m_config.nPredict).toInt();
    m_config.tMaxPromptMs = settings.value("tMaxPromptMs", m_config.tMaxPromptMs).toInt();
    m_config.tMaxPredictMs = settings.value("tMaxPredictMs", m_config.tMaxPredictMs).toInt();
    m_config.showInfo = settings.value("showInfo", m_config.showInfo).toInt();
    m_config.autoFim = settings.value("autoFim", m_config.autoFim).toBool();
    m_config.maxLineSuffix = settings.value("maxLineSuffix", m_config.maxLineSuffix).toInt();
    m_config.maxCacheKeys = settings.value("maxCacheKeys", m_config.maxCacheKeys).toInt();
    m_config.ringNChunks = settings.value("ringNChunks", m_config.ringNChunks).toInt();
    m_config.ringChunkSize = settings.value("ringChunkSize", m_config.ringChunkSize).toInt();
    m_config.ringScope = settings.value("ringScope", m_config.ringScope).toInt();
    m_config.ringUpdateMs = settings.value("ringUpdateMs", m_config.ringUpdateMs).toInt();
    m_config.keymapTrigger = settings.value("keymapTrigger", m_config.keymapTrigger).toString();
    m_config.keymapAcceptFull = settings.value("keymapAcceptFull", m_config.keymapAcceptFull).toString();
    m_config.keymapAcceptLine = settings.value("keymapAcceptLine", m_config.keymapAcceptLine).toString();
    m_config.keymapAcceptWord = settings.value("keymapAcceptWord", m_config.keymapAcceptWord).toString();
    
    // Load stop strings
    QStringList stopStrings;
    int count = settings.beginReadArray("stopStrings");
    for (int i = 0; i < count; ++i) {
        settings.setArrayIndex(i);
        stopStrings << settings.value("string").toString();
    }
    settings.endArray();
    m_config.stopStrings = stopStrings;
}

void LlamaPlugin::saveConfiguration()
{
    QSettings settings(QSettings::UserScope, "QtProject", "LlamaPlugin");
    
    settings.setValue("endpoint", m_config.endpoint);
    settings.setValue("apiKey", m_config.apiKey);
    settings.setValue("nPrefix", m_config.nPrefix);
    settings.setValue("nSuffix", m_config.nSuffix);
    settings.setValue("nPredict", m_config.nPredict);
    settings.setValue("tMaxPromptMs", m_config.tMaxPromptMs);
    settings.setValue("tMaxPredictMs", m_config.tMaxPredictMs);
    settings.setValue("showInfo", m_config.showInfo);
    settings.setValue("autoFim", m_config.autoFim);
    settings.setValue("maxLineSuffix", m_config.maxLineSuffix);
    settings.setValue("maxCacheKeys", m_config.maxCacheKeys);
    settings.setValue("ringNChunks", m_config.ringNChunks);
    settings.setValue("ringChunkSize", m_config.ringChunkSize);
    settings.setValue("ringScope", m_config.ringScope);
    settings.setValue("ringUpdateMs", m_config.ringUpdateMs);
    settings.setValue("keymapTrigger", m_config.keymapTrigger);
    settings.setValue("keymapAcceptFull", m_config.keymapAcceptFull);
    settings.setValue("keymapAcceptLine", m_config.keymapAcceptLine);
    settings.setValue("keymapAcceptWord", m_config.keymapAcceptWord);
    
    // Save stop strings
    settings.beginWriteArray("stopStrings");
    for (int i = 0; i < m_config.stopStrings.size(); ++i) {
        settings.setArrayIndex(i);
        settings.setValue("string", m_config.stopStrings[i]);
    }
    settings.endArray();
}

void LlamaPlugin::requestCompletion(int pos_x, int pos_y, bool isAuto)
{
    if (!m_currentEditor || !m_currentDocument) return;
    
    // Get local context
    QString context = getLocalContext(pos_x, pos_y);
    if (context.isEmpty()) return;
    
    // Check cache first
    QString hash = QCryptographicHash::hash(context.toUtf8(), QCryptographicHash::Sha256).toHex();
    
    if (m_cacheData.contains(hash)) {
        processCompletionResponse(m_cacheData[hash]);
        return;
    }
    
    // Create JSON request
    QJsonObject request;
    request["input_prefix"] = "";
    request["input_suffix"] = "";
    request["input_extra"] = QJsonArray(); // Will be filled with extra context
    
    // Add prompt (middle part)
    QString prefix, middle, suffix;
    QTextBlock block = m_currentDocument->findBlockByNumber(pos_y - 1);
    if (!block.isValid()) return;
    
    QString line = block.text();
    prefix = "";
    middle = line.left(pos_x);
    suffix = line.mid(pos_x);
    
    request["prompt"] = middle;
    request["n_predict"] = m_config.nPredict;
    request["stop"] = QJsonArray::fromStringList(m_config.stopStrings);
    request["top_k"] = 40;
    request["top_p"] = 0.9;
    request["stream"] = false;
    request["samplers"] = QStringList{"top_k", "top_p", "infill"};
    request["cache_prompt"] = true;
    request["t_max_prompt_ms"] = m_config.tMaxPromptMs;
    request["t_max_predict_ms"] = isAuto ? 250 : m_config.tMaxPredictMs; // Faster for auto completion
    
    // Add extra context
    QJsonArray extraContext;
    for (const Chunk &chunk : m_ringChunks) {
        QJsonObject chunkObj;
        chunkObj["text"] = chunk.data;
        chunkObj["time"] = chunk.time.toString(Qt::ISODate);
        chunkObj["filename"] = chunk.filename;
        extraContext.append(chunkObj);
    }
    request["input_extra"] = extraContext;
    
    // Create JSON document
    QJsonDocument doc(request);
    QByteArray jsonData = doc.toJson(QJsonDocument::Compact);
    
    // Send request
    QNetworkRequest req(QUrl(m_config.endpoint));
    req.setHeader(QNetworkRequest::ContentTypeHeader, "application/json");
    if (!m_config.apiKey.isEmpty()) {
        req.setRawHeader("Authorization", "Bearer " + m_config.apiKey.toUtf8());
    }
    
    QNetworkReply *reply = m_networkManager->post(req, jsonData);
    
    // Connect to response
    connect(reply, &QNetworkReply::finished, 
            [this, reply, hash]() {
        if (reply->error() == QNetworkReply::NoError) {
            QByteArray data = reply->readAll();
            QString response = QString::fromUtf8(data);
            
            // Cache the result
            m_cacheData[hash] = response;
            
            // Limit cache size
            if (m_cacheData.size() > m_config.maxCacheKeys) {
                auto it = m_cacheData.begin();
                m_cacheData.erase(it);
            }
            
            processCompletionResponse(response);
        } else {
            qDebug() << "Error fetching completion:" << reply->errorString();
        }
        reply->deleteLater();
    });
}

void LlamaPlugin::processCompletionResponse(const QString &response)
{
    // Parse JSON response
    QJsonParseError error;
    QJsonDocument doc = QJsonDocument::fromJson(response.toUtf8(), &error);
    
    if (error.error != QJsonParseError::NoError) {
        qDebug() << "JSON parse error:" << error.errorString();
        return;
    }
    
    QJsonObject obj = doc.object();
    QString content = obj.value("content").toString();
    
    if (!content.isEmpty()) {
        // Get cursor position for hint
        TextEditorWidget *editor = qobject_cast<TextEditorWidget*>(m_currentEditor->widget());
        if (editor) {
            QTextCursor cursor = editor->textCursor();
            int pos_x = cursor.positionInBlock();
            int pos_y = cursor.blockNumber() + 1;
            
            showCompletionHint(content, pos_x, pos_y);
        }
    }
}

void LlamaPlugin::showCompletionHint(const QString &content, int pos_x, int pos_y)
{
    // In Qt Creator, we can't directly add virtual text like in Vim.
    // Instead, we'll show a tooltip or use status bar
    
    if (m_config.showInfo > 0) {
        // Show in status bar
        QString info = "LLaMA: Suggestion ready";
        if (m_config.showInfo == 2) {
            // Add detailed info when needed
        }
        
        // In a real implementation, we would show the content in an overlay
        // or use a custom widget to display the suggestion
    }
    
    m_hintShown = true;
}

void LlamaPlugin::hideCompletionHint()
{
    m_hintShown = false;
}

QString LlamaPlugin::getLocalContext(int pos_x, int pos_y, const QString &prev)
{
    if (!m_currentDocument) return QString();
    
    QTextBlock block = m_currentDocument->findBlockByNumber(pos_y - 1);
    if (!block.isValid()) return QString();
    
    QString lineCur = block.text();
    QString lineCurPrefix = lineCur.left(pos_x);
    QString lineCurSuffix = lineCur.mid(pos_x);
    
    // Get prefix lines
    QStringList linesPrefix;
    int startLine = qMax(1, pos_y - m_config.nPrefix);
    for (int i = startLine; i < pos_y; ++i) {
        QTextBlock b = m_currentDocument->findBlockByNumber(i - 1);
        if (b.isValid()) {
            linesPrefix.append(b.text());
        }
    }
    
    // Get suffix lines
    QStringList linesSuffix;
    int endLine = qMin(m_currentDocument->lineCount(), pos_y + m_config.nSuffix);
    for (int i = pos_y + 1; i <= endLine; ++i) {
        QTextBlock b = m_currentDocument->findBlockByNumber(i - 1);
        if (b.isValid()) {
            linesSuffix.append(b.text());
        }
    }
    
    QString prefix = linesPrefix.join("\n") + "\n";
    QString middle = lineCurPrefix;
    QString suffix = lineCurSuffix + "\n" + linesSuffix.join("\n") + "\n";
    
    return prefix + middle + "Î" + suffix;
}

void LlamaPlugin::gatherExtraContext()
{
    if (m_config.ringNChunks <= 0 || !m_currentDocument) return;
    
    // Gather context around current position
    TextEditorWidget *editor = qobject_cast<TextEditorWidget*>(m_currentEditor->widget());
    if (!editor) return;
    
    QTextCursor cursor = editor->textCursor();
    int pos_y = cursor.blockNumber() + 1;
    
    // Get lines around cursor
    int startLine = qMax(1, pos_y - m_config.ringChunkSize/2);
    int endLine = qMin(m_currentDocument->lineCount(), pos_y + m_config.ringChunkSize/2);
    
    QStringList lines;
    for (int i = startLine; i <= endLine; ++i) {
        QTextBlock block = m_currentDocument->findBlockByNumber(i - 1);
        if (block.isValid()) {
            lines.append(block.text());
        }
    }
    
    if (lines.size() < 3) return;
    
    // Pick a random chunk
    int l0 = qrand() % (lines.size() - m_config.ringChunkSize/2);
    int l1 = qMin(l0 + m_config.ringChunkSize/2, lines.size());
    
    QStringList chunk = lines.mid(l0, l1 - l0);
    QString chunkStr = chunk.join("\n") + "\n";
    
    // Check if already exists
    for (const Chunk &c : m_ringChunks) {
        if (c.data == chunkStr) return;
    }
    for (const Chunk &c : m_ringQueued) {
        if (c.data == chunkStr) return;
    }
    
    // Evict similar chunks
    for (int i = m_ringQueued.size() - 1; i >= 0; --i) {
        const Chunk &queued = m_ringQueued[i];
        if (queued.data.contains(chunkStr) || chunkStr.contains(queued.data)) {
            m_ringQueued.removeAt(i);
            m_ringNEvict++;
        }
    }
    
    // Add to queued
    Chunk newChunk;
    newChunk.data = chunkStr;
    newChunk.time = QDateTime::currentDateTime();
    newChunk.filename = m_currentEditor->document()->fileName();
    m_ringQueued.append(newChunk);
    
    if (m_ringQueued.size() > 16) {
        m_ringQueued.removeFirst();
    }
}

void LlamaPlugin::processExtraContext()
{
    if (m_ringQueued.isEmpty()) return;
    
    // Move first queued chunk to ring buffer
    if (m_ringChunks.size() >= m_config.ringNChunks) {
        m_ringChunks.removeFirst();
    }
    
    Chunk chunk = m_ringQueued.takeFirst();
    m_ringChunks.append(chunk);
    
    // Send request to update context on server
    QJsonObject request;
    request["input_prefix"] = "";
    request["input_suffix"] = "";
    request["prompt"] = "";
    request["n_predict"] = 0;
    request["temperature"] = 0.0;
    request["stream"] = false;
    request["samplers"] = QStringList();
    request["cache_prompt"] = true;
    request["t_max_prompt_ms"] = 1;
    request["t_max_predict_ms"] = 1;
    
    // Add extra context
    QJsonArray extraContext;
    for (const Chunk &c : m_ringChunks) {
        QJsonObject chunkObj;
        chunkObj["text"] = c.data;
        chunkObj["time"] = c.time.toString(Qt::ISODate);
        chunkObj["filename"] = c.filename;
        extraContext.append(chunkObj);
    }
    request["input_extra"] = extraContext;
    
    // Create JSON document
    QJsonDocument doc(request);
    QByteArray jsonData = doc.toJson(QJsonDocument::Compact);
    
    // Send request to server
    QNetworkRequest req(QUrl(m_config.endpoint));
    req.setHeader(QNetworkRequest::ContentTypeHeader, "application/json");
    if (!m_config.apiKey.isEmpty()) {
        req.setRawHeader("Authorization", "Bearer " + m_config.apiKey.toUtf8());
    }
    
    m_networkManager->post(req, jsonData);
}

} // namespace Internal
} // namespace Llama

// llama_settings.h
#ifndef LLAMA_SETTINGS_H
#define LLAMA_SETTINGS_H

#include <QWidget>
#include <QDialog>
#include <QLineEdit>
#include <QSpinBox>
#include <QCheckBox>
#include <QListWidget>
#include <QPushButton>
#include <QVBoxLayout>
#include <QHBoxLayout>
#include <QGridLayout>

namespace Llama {
namespace Internal {

struct Config {
    QString endpoint;
    QString apiKey;
    int nPrefix;
    int nSuffix;
    int nPredict;
    QStringList stopStrings;
    int tMaxPromptMs;
    int tMaxPredictMs;
    int showInfo;
    bool autoFim;
    int maxLineSuffix;
    int maxCacheKeys;
    int ringNChunks;
    int ringChunkSize;
    int ringScope;
    int ringUpdateMs;
    QString keymapTrigger;
    QString keymapAcceptFull;
    QString keymapAcceptLine;
    QString keymapAcceptWord;
};

class LlamaSettings : public QDialog
{
    Q_OBJECT

public:
    explicit LlamaSettings(const Config &config, QWidget *parent = nullptr);
    
signals:
    void configChanged(const Config &newConfig);

private slots:
    void saveConfiguration();
    void addStopString();
    void removeStopString();

private:
    void setupUI();
    
    // Configuration widgets
    QLineEdit *m_endpointEdit;
    QLineEdit *m_apiKeyEdit;
    QSpinBox *m_nPrefixSpin;
    QSpinBox *m_nSuffixSpin;
    QSpinBox *m_nPredictSpin;
    QListWidget *m_stopStringsList;
    QPushButton *m_addStopButton;
    QPushButton *m_removeStopButton;
    QSpinBox *m_tMaxPromptSpin;
    QSpinBox *m_tMaxPredictSpin;
    QSpinBox *m_showInfoSpin;
    QCheckBox *m_autoFimCheck;
    QSpinBox *m_maxLineSuffixSpin;
    QSpinBox *m_maxCacheKeysSpin;
    QSpinBox *m_ringNChunksSpin;
    QSpinBox *m_ringChunkSizeSpin;
    QSpinBox *m_ringScopeSpin;
    QSpinBox *m_ringUpdateMsSpin;
    QLineEdit *m_keymapTriggerEdit;
    QLineEdit *m_keymapAcceptFullEdit;
    QLineEdit *m_keymapAcceptLineEdit;
    QLineEdit *m_keymapAcceptWordEdit;
    
    Config m_currentConfig;
};

} // namespace Internal
} // namespace Llama

#endif // LLAMA_SETTINGS_H

// llama_settings.cpp
#include "llama_settings.h"
#include <QLabel>
#include <QGroupBox>
#include <QDialogButtonBox>
#include <QApplication>

namespace Llama {
namespace Internal {

LlamaSettings::LlamaSettings(const Config &config, QWidget *parent)
    : QDialog(parent)
    , m_currentConfig(config)
{
    setupUI();
    setWindowTitle("LLaMA Plugin Settings");
}

void LlamaSettings::setupUI()
{
    QVBoxLayout *mainLayout = new QVBoxLayout(this);
    
    // Main configuration group
    QGroupBox *configGroup = new QGroupBox("Configuration");
    QGridLayout *configLayout = new QGridLayout(configGroup);
    
    configLayout->addWidget(new QLabel("Endpoint:"), 0, 0);
    m_endpointEdit = new QLineEdit(m_currentConfig.endpoint);
    configLayout->addWidget(m_endpointEdit, 0, 1);
    
    configLayout->addWidget(new QLabel("API Key:"), 1, 0);
    m_apiKeyEdit = new QLineEdit(m_currentConfig.apiKey);
    configLayout->addWidget(m_apiKeyEdit, 1, 1);
    
    configLayout->addWidget(new QLabel("Prefix Lines:"), 2, 0);
    m_nPrefixSpin = new QSpinBox();
    m_nPrefixSpin->setRange(1, 10000);
    m_nPrefixSpin->setValue(m_currentConfig.nPrefix);
    configLayout->addWidget(m_nPrefixSpin, 2, 1);
    
    configLayout->addWidget(new QLabel("Suffix Lines:"), 3, 0);
    m_nSuffixSpin = new QSpinBox();
    m_nSuffixSpin->setRange(1, 10000);
    m_nSuffixSpin->setValue(m_currentConfig.nSuffix);
    configLayout->addWidget(m_nSuffixSpin, 3, 1);
    
    configLayout->addWidget(new QLabel("Max Predictions:"), 4, 0);
    m_nPredictSpin = new QSpinBox();
    m_nPredictSpin->setRange(1, 10000);
    m_nPredictSpin->setValue(m_currentConfig.nPredict);
    configLayout->addWidget(m_nPredictSpin, 4, 1);
    
    configLayout->addWidget(new QLabel("Stop Strings:"), 5, 0);
    m_stopStringsList = new QListWidget();
    for (const QString &str : m_currentConfig.stopStrings) {
        m_stopStringsList->addItem(str);
    }
    configLayout->addWidget(m_stopStringsList, 5, 1);
    
    QHBoxLayout *stopButtonsLayout = new QHBoxLayout();
    m_addStopButton = new QPushButton("Add");
    m_removeStopButton = new QPushButton("Remove");
    connect(m_addStopButton, &QPushButton::clicked, this, &LlamaSettings::addStopString);
    connect(m_removeStopButton, &QPushButton::clicked, this, &LlamaSettings::removeStopString);
    stopButtonsLayout->addWidget(m_addStopButton);
    stopButtonsLayout->addWidget(m_removeStopButton);
    configLayout->addLayout(stopButtonsLayout, 6, 1);
    
    configLayout->addWidget(new QLabel("Max Prompt Time (ms):"), 7, 0);
    m_tMaxPromptSpin = new QSpinBox();
    m_tMaxPromptSpin->setRange(1, 100000);
    m_tMaxPromptSpin->setValue(m_currentConfig.tMaxPromptMs);
    configLayout->addWidget(m_tMaxPromptSpin, 7, 1);
    
    configLayout->addWidget(new QLabel("Max Predict Time (ms):"), 8, 0);
    m_tMaxPredictSpin = new QSpinBox();
    m_tMaxPredictSpin->setRange(1, 100000);
    m_tMaxPredictSpin->setValue(m_currentConfig.tMaxPredictMs);
    configLayout->addWidget(m_tMaxPredictSpin, 8, 1);
    
    configLayout->addWidget(new QLabel("Show Info:"), 9, 0);
    m_showInfoSpin = new QSpinBox();
    m_showInfoSpin->setRange(0, 2);
    m_showInfoSpin->setValue(m_currentConfig.showInfo);
    configLayout->addWidget(m_showInfoSpin, 9, 1);
    
    configLayout->addWidget(new QLabel("Auto FIM:"), 10, 0);
    m_autoFimCheck = new QCheckBox();
    m_autoFimCheck->setChecked(m_currentConfig.autoFim);
    configLayout->addWidget(m_autoFimCheck, 10, 1);
    
    configLayout->addWidget(new QLabel("Max Line Suffix:"), 11, 0);
    m_maxLineSuffixSpin = new QSpinBox();
    m_maxLineSuffixSpin->setRange(1, 1000);
    m_maxLineSuffixSpin->setValue(m_currentConfig.maxLineSuffix);
    configLayout->addWidget(m_maxLineSuffixSpin, 11, 1);
    
    configLayout->addWidget(new QLabel("Max Cache Keys:"), 12, 0);
    m_maxCacheKeysSpin = new QSpinBox();
    m_maxCacheKeysSpin->setRange(1, 10000);
    m_maxCacheKeysSpin->setValue(m_currentConfig.maxCacheKeys);
    configLayout->addWidget(m_maxCacheKeysSpin, 12, 1);
    
    mainLayout->addWidget(configGroup);
    
    // Context gathering group
    QGroupBox *contextGroup = new QGroupBox("Context Gathering");
    QGridLayout *contextLayout = new QGridLayout(contextGroup);
    
    contextLayout->addWidget(new QLabel("Ring Chunks:"), 0, 0);
    m_ringNChunksSpin = new QSpinBox();
    m_ringNChunksSpin->setRange(0, 1000);
    m_ringNChunksSpin->setValue(m_currentConfig.ringNChunks);
    contextLayout->addWidget(m_ringNChunksSpin, 0, 1);
    
    contextLayout->addWidget(new QLabel("Chunk Size:"), 1, 0);
    m_ringChunkSizeSpin = new QSpinBox();
    m_ringChunkSizeSpin->setRange(1, 1000);
    m_ringChunkSizeSpin->setValue(m_currentConfig.ringChunkSize);
    contextLayout->addWidget(m_ringChunkSizeSpin, 1, 1);
    
    contextLayout->addWidget(new QLabel("Ring Scope:"), 2, 0);
    m_ringScopeSpin = new QSpinBox();
    m_ringScopeSpin->setRange(1, 10000);
    m_ringScopeSpin->setValue(m_currentConfig.ringScope);
    contextLayout->addWidget(m_ringScopeSpin, 2, 1);
    
    contextLayout->addWidget(new QLabel("Update Interval (ms):"), 3, 0);
    m_ringUpdateMsSpin = new QSpinBox();
    m_ringUpdateMsSpin->setRange(100, 10000);
    m_ringUpdateMsSpin->setValue(m_currentConfig.ringUpdateMs);
    contextLayout->addWidget(m_ringUpdateMsSpin, 3, 1);
    
    mainLayout->addWidget(contextGroup);
    
    // Keymap group
    QGroupBox *keymapGroup = new QGroupBox("Keymaps");
    QGridLayout *keymapLayout = new QGridLayout(keymapGroup);
    
    keymapLayout->addWidget(new QLabel("Trigger:"), 0, 0);
    m_keymapTriggerEdit = new QLineEdit(m_currentConfig.keymapTrigger);
    keymapLayout->addWidget(m_keymapTriggerEdit, 0, 1);
    
    keymapLayout->addWidget(new QLabel("Accept Full:"), 1, 0);
    m_keymapAcceptFullEdit = new QLineEdit(m_currentConfig.keymapAcceptFull);
    keymapLayout->addWidget(m_keymapAcceptFullEdit, 1, 1);
    
    keymapLayout->addWidget(new QLabel("Accept Line:"), 2, 0);
    m_keymapAcceptLineEdit = new QLineEdit(m_currentConfig.keymapAcceptLine);
    keymapLayout->addWidget(m_keymapAcceptLineEdit, 2, 1);
    
    keymapLayout->addWidget(new QLabel("Accept Word:"), 3, 0);
    m_keymapAcceptWordEdit = new QLineEdit(m_currentConfig.keymapAcceptWord);
    keymapLayout->addWidget(m_keymapAcceptWordEdit, 3, 1);
    
    mainLayout->addWidget(keymapGroup);
    
    // Buttons
    QDialogButtonBox *buttonBox = new QDialogButtonBox(QDialogButtonBox::Save | QDialogButtonBox::Cancel);
    connect(buttonBox, &QDialogButtonBox::accepted, this, &LlamaSettings::saveConfiguration);
    connect(buttonBox, &QDialogButtonBox::rejected, this, &QDialog::reject);
    mainLayout->addWidget(buttonBox);
    
    setLayout(mainLayout);
}

void LlamaSettings::saveConfiguration()
{
    m_currentConfig.endpoint = m_endpointEdit->text();
    m_currentConfig.apiKey = m_apiKeyEdit->text();
    m_currentConfig.nPrefix = m_nPrefixSpin->value();
    m_currentConfig.nSuffix = m_nSuffixSpin->value();
    m_currentConfig.nPredict = m_nPredictSpin->value();
    
    // Get stop strings
    m_currentConfig.stopStrings.clear();
    for (int i = 0; i < m_stopStringsList->count(); ++i) {
        m_currentConfig.stopStrings << m_stopStringsList->item(i)->text();
    }
    
    m_currentConfig.tMaxPromptMs = m_tMaxPromptSpin->value();
    m_currentConfig.tMaxPredictMs = m_tMaxPredictSpin->value();
    m_currentConfig.showInfo = m_showInfoSpin->value();
    m_currentConfig.autoFim = m_autoFimCheck->isChecked();
    m_currentConfig.maxLineSuffix = m_maxLineSuffixSpin->value();
    m_currentConfig.maxCacheKeys = m_maxCacheKeysSpin->value();
    m_currentConfig.ringNChunks = m_ringNChunksSpin->value();
    m_currentConfig.ringChunkSize = m_ringChunkSizeSpin->value();
    m_currentConfig.ringScope = m_ringScopeSpin->value();
    m_currentConfig.ringUpdateMs = m_ringUpdateMsSpin->value();
    m_currentConfig.keymapTrigger = m_keymapTriggerEdit->text();
    m_currentConfig.keymapAcceptFull = m_keymapAcceptFullEdit->text();
    m_currentConfig.keymapAcceptLine = m_keymapAcceptLineEdit->text();
    m_currentConfig.keymapAcceptWord = m_keymapAcceptWordEdit->text();
    
    emit configChanged(m_currentConfig);
    accept();
}

void LlamaSettings::addStopString()
{
    QString text = QInputDialog::getText(this, "Add Stop String", "Enter stop string:");
    if (!text.isEmpty()) {
        m_stopStringsList->addItem(text);
    }
}

void LlamaSettings::removeStopString()
{
    QList<QListWidgetItem*> selectedItems = m_stopStringsList->selectedItems();
    for (QListWidgetItem *item : selectedItems) {
        delete item;
    }
}

} // namespace Internal
} // namespace Llama

// LlamaPlugin.json
{
    "IID": "org.qt-project.Qt.QtCreatorPlugin",
    "ClassName": "Llama::Internal::LlamaPlugin",
    "Name": "LLaMA Infill Plugin",
    "Version": "1.0.0",
    "Vendor": "Qt Project",
    "Description": "LLaMA Infill completion plugin for Qt Creator",
    "License": "GNU General Public License v3.0",
    "Dependencies": [
        {
            "Name": "Core",
            "Version": "1.0"
        },
        {
            "Name": "TextEditor",
            "Version": "1.0"
        }
    ]
}

Key Features Implemented

Usage Instructions

The plugin provides a similar experience to the vim version but adapted for Qt Creator’s architecture and UI framework.

USER

Qwen3 Coder 30b

Qt Creator Plugin for LLaMA Infill Completion

Plugin Structure

Key Features Implemented

Usage Instructions