;Q Pentium Profiler v1.00

; See \txt\profiler.txt for help.

include src\qlib.inc
include stdio.inc
include dpmi.inc
include profiler.inc

spdhi equ 000h          ;Speed to setup High Speed PIT to setup (IRQ1).
spdlo equ 010h

.data
align 4
counter dd ?
oldIRQ1 df ?
om dw ?

msgtbl label byte
  dd "data write"
  dd "data read"
  dd "data TLB miss"
  dd "data read miss"
  dd "data write miss"
  dd "write (hit) to M or E state lines"
  dd "data cache lines written back"
  dd "data cache snoops"
  dd "data cache snoop hits"
  dd "memory accesses in both pipes"
  dd "bank conflict"
  dd "misaligned data"
  dd "code read"
  dd "code TLB miss"
  dd "code cache miss"
  dd "segment load"
  dd "????"
  dd "????"
  dd "jmps"
  dd "BTB hits (Branch Target Buffer)"
  dd "taken branch OR BTB hit"
  dd "pipeline flushes"
  dd "instructions executed"
  dd "instructions executed in V-pipe"
  dd "bus utilization (clocks)"
  dd "pipeline stalled by write backup"
  dd "pipeline stalled by data memory write"
  dd "pipeline stalled by write to E or M line"
  dd "locked bus cycles"
  dd "i/o read or write cycles"
  dd "non cacheable memory references"
  dd "AGI"
  dd "????"
  dd "????"
  dd "FPU operations"
  dd "breakpoint 0 match"
  dd "breakpoint 1 match"
  dd "breakpoint 2 match"
  dd "breakpoint 3 match"
  dd "hardware interrupts"
  dd "data read or data write"
  dd "data read miss or data write miss"

.code

_RDMSR macro
  db 0fh,032h
endm
_WRMSR macro
  db 0fh,030h
endm

profiler_init proc
  in al,021h
  shl ax,8
  in al,0a1h
  mov om,ax       ;save IRQ masks

  .if _cpu < 5
    sub esp,sizeof pmPROCstruct
    mov ebx,esp
    callp _DPMI_getpmint,8,ebx
    mov ax,[ebx].pmPROCstruct._sel
    mov wptr[oldIRQ1+4],ax
    mov eax,[ebx].pmPROCstruct._off
    mov dptr[oldIRQ1],eax

    mov [ebx].pmPROCstruct._sel,cs
    mov [ebx].pmPROCstruct._off,offset irq1
    callp _DPMI_setpmint,8,ebx
    add esp,sizeof pmPROCstruct

    mov al,0ffh-3
    out 021h,al   ;enable only the timer and kbd
    mov al,0ffh
    out 0a1h,al 

    ;setup a fast TIMER
    cli
    mov al,00110110b
    out 43h,al
    mov al,spdlo
    out 40h,al
    mov al,spdhi       ;1,191,180 = freq of timer
    out 40h,al  ;really fast
    sti
  .else   ;Pentium
    mov al,0ffh-2
    out 021h,al   ;enable only the kbd (PIT disabled!)
    mov al,0ffh
    out 0a1h,al 
    .if _cpl == 0
      mov eax,1    ;MSRs available
      ret
    .endif
  .endif
  xor eax,eax
  ret
profiler_init endp

align 4
irq1 proc private  ;v2.10 b2 : this was not private!
  push ds
  push ax
  mov ds,cs:seldata
  inc counter
  mov al,20h
  out 20h,al
  pop ax
  pop ds
  iretd
irq1 endp

profiler_uninit proc
  .if _cpu < 5
    cli
    mov al,00110110b
    out 43h,al
    xor al,al
    out 40h,al
    out 40h,al  ;Default speed (18.2 / sec)
    sti

    sub esp,sizeof pmPROCstruct
    mov ebx,esp
    mov ax,wptr[oldIRQ1+4]
    mov [ebx].pmPROCstruct._sel,ax
    mov eax,dptr[oldIRQ1]
    mov [ebx].pmPROCstruct._off,eax
    callp _DPMI_setpmint,8,ebx
    add esp,sizeof pmPROCstruct
  .endif

  mov ax,om
  out 0a1h,al
  shr ax,8
  out 021h,al
  ret
profiler_uninit endp

comment ~
Here comes the format of the "profiling register indexes":
bit 0..5 : type of the profiling register to access
bit 6 : Set if you want to monitor the events in cpu ring 0,1,2 (system)
bit 7 : Set if you want to monitor the events in cpu ring 3 (user level)
bit 8 : 0 = access count-of-hardware-events
        1 = access count-of-total-cpu-cycles used to process the cumulated
events.
(i''m not sure of this, maybe 0 means count time and 1 count events)
bit 9..15: UNKNOWN, DO NOT MODIFY
~

align 4
profiler_start proc,pf:dword
  pushad
  mov ebx,pf
  cli
  .if _cpu >= 5
    db 0fh,31h  ;EAX:EDX = cycle clock
    mov [ebx].profiler_info.cnt1,eax
    mov [ebx].profiler_info.cnt2,edx
    .if _cpl == 0
      ;setup MSRs
      mov ecx,11h
      _RDMSR  ;MSR => EDX:EAX
      xor ecx,ecx
      mov cl,[ebx].profiler_info.idx2   ;FIX : v2.10 : These were reversed
      shl ecx,16
      mov cl,[ebx].profiler_info.idx1   ;FIX : v2.10 : These were reversed
      and eax,0fe00fe00h      ;Do Not modify
      or  eax,ecx
      or  eax,000c000c0h      ;Set to monitor on CPL0,1,2,3
      mov ecx,11h
      _WRMSR  ;EDX:EAX => MSR
      mov ecx,12h
      _RDMSR  ;MSR => EDX:EAX
      mov [ebx].profiler_info.idx1dl,eax
      mov [ebx].profiler_info.idx1dh,edx
      mov ecx,13h
      _RDMSR  ;MSR => EDX:EAX
      mov [ebx].profiler_info.idx2dl,eax
      mov [ebx].profiler_info.idx2dh,edx
    .endif
    popad
    ret
  .else
    mov edx,counter
    mov [ebx].profiler_info.cnt1,edx
    mov [ebx].profiler_info.cnt2,0
    popad
    ret
  .endif
profiler_start endp

_64SUB macro
  ;;64bit subtraction   (I wish MMX could do this, but alas it is not a 64bit
  ;;                     instruction set, not like I would use it though)
  ;; IN ->  EDX:EAX = 1st 64bit number
  ;;        EDI:ESI = 2nd 64bit number
  ;; OUT -> EDX:EAX = absolute subtraction
  ;; Equation -> EDX:EAX = EDX:EAX - EDI:ESI
  sub eax,esi
  .if carry?
    dec edx
  .endif
  sub edx,edi
endm

_64ABS macro
  ;;64bit absolute
  ;;
  ;; IN ->  EDX:EAX = number
  ;; OUT -> EDX:EAX = absolute of number
  ;; Equation -> EDX:EAX = | EDX:EAX |
  test edx,80000000h   ;FIX : v2.10 Beta #1 : Was only 6 zeros.
  .if !zero?
    ;;2's complement
    not edx
    not eax
    add eax,1
    .if carry?
      inc edx
    .endif
  .endif
endm

align 4
profiler_stop proc,pf:dword
  pushad
  mov ebx,pf
  .if _cpu >= 5
    db 0fh,31h  ;EDX:EAX = cycle clock
    mov esi,[ebx].profiler_info.cnt1
    mov edi,[ebx].profiler_info.cnt2
    _64SUB
    _64ABS
    mov [ebx].profiler_info.cnt1,eax
    mov [ebx].profiler_info.cnt2,edx
    .if _cpl == 0
      ;read MSRs
      mov ecx,12h
      _RDMSR  ;MSR => EDX:EAX
      mov esi,[ebx].profiler_info.idx1dl
      mov edi,[ebx].profiler_info.idx1dh
      _64SUB
      _64ABS
      mov [ebx].profiler_info.idx1dl,eax
      mov [ebx].profiler_info.idx1dh,edx
      mov ecx,13h
      _RDMSR  ;MSR => EDX:EAX
      mov esi,[ebx].profiler_info.idx2dl
      mov edi,[ebx].profiler_info.idx2dh
      _64SUB
      _64ABS
      mov [ebx].profiler_info.idx2dl,eax
      mov [ebx].profiler_info.idx2dh,edx
    .endif
    sti
    popad
    ret
  .else
    mov edx,counter
    sub [ebx].profiler_info.cnt1,edx
    .if !carry?
      neg [ebx].profiler_info.cnt1
    .endif
    sti
    popad
    ret
  .endif
profiler_stop endp

profiler_print proc,pf:dword
  push ebx
  mov ebx,pf
  ;Print cnt1 and idx1d,idx2d...
  callp printf,"Counter = %10u:%10u\n",[ebx].profiler_info.cnt2,[ebx].profiler_info.cnt1
  .if _cpu >=5 && _cpl == 0
    xor eax,eax
    mov al,[ebx].profiler_info.idx1
    shl eax,2  ;Dwords
    add eax,offset msgtbl
    callp printf,"MSR1(%3u) = %10u:%10u (%s)\n",[ebx].profiler_info.idx1,[ebx].profiler_info.idx1dh,[ebx].profiler_info.idx1dl,dptr[eax]
    xor eax,eax
    mov al,[ebx].profiler_info.idx2
    shl eax,2  ;Dwords
    add eax,offset msgtbl
    callp printf,"MSR2(%3u) = %10u:%10u (%s)\n",[ebx].profiler_info.idx2,[ebx].profiler_info.idx2dh,[ebx].profiler_info.idx2dl,dptr[eax]
  .endif
  pop ebx
  ret
profiler_print endp

_endseg

end
