sipsorcery's blog

Occassional posts about VoIP, SIP, WebRTC and Bitcoin.

sipsorcery.com response times SIP Sorcery Last 3 Hours
daily weekly
sipsorcery.com status

Building a video softphone part III.2

Capturing the and converting the video stream from my webcam to an H.264 file didn’t prove to be as bad as I thought. It did help a lot that the Media Foundation SDK has a sample called MFCaptureToFile that’s doing exactly the same thing and the vast majority I’ve used below has been copied and pasted directly from that sample.

Here’s the 5 second .mp4 video that represents the fruits of my labour.

The important code bits are shown below (just to reiterate I barely know what I’m doing with this stuff and generally remove the error checking and memory management code to help me get the gist of the bits I’m interested in).

// Initialize the Media Foundation platform.
hr = MFStartup(MF_VERSION);
if (SUCCEEDED(hr))
{
  WCHAR *pwszFileName = L"sample.mp4";
  IMFSinkWriter *pWriter;

  hr = MFCreateSinkWriterFromURL(
    pwszFileName,
    NULL,
    NULL,
    pWriter);

  // Create the source reader.
  IMFSourceReader *pReader;

  hr = MFCreateSourceReaderFromMediaSource(*ppSource, pConfig, pReader);

  //GetCurrentMediaType(pReader);
  //ListModes(pReader);

  pReader->GetCurrentMediaType((DWORD)MF_SOURCE_READER_FIRST_VIDEO_STREAM, pType);

  printf("Configuring H.264 sink.n");

  // Set up the H.264 sink.
  hr = ConfigureEncoder(pType, pWriter);
  if (FAILED(hr))
  {
    printf("Configuring the H.264 sink failed.n");
  }

  // Register the color converter DSP for this process, in the video
  // processor category. This will enable the sink writer to enumerate
  // the color converter when the sink writer attempts to match the
  // media types.

  hr = MFTRegisterLocalByCLSID(
    __uuidof(CColorConvertDMO),
    MFT_CATEGORY_VIDEO_PROCESSOR,
    L"",
    MFT_ENUM_FLAG_SYNCMFT,
    0,
    NULL,
    0,
    NULL);

  hr = pWriter->SetInputMediaType(0, pType, NULL);
  if (FAILED(hr))
  {
    printf("Failure setting the input media type on the H.264 sink.n");
  }

  hr = pWriter->BeginWriting();
  if (FAILED(hr))
  {
    printf("Failed to begin writing on the H.264 sink.n");
  }

  DWORD streamIndex, flags;
  LONGLONG llTimeSt
  IMFSample *pSample = NULL;
  CRITICAL_SECTION critsec;
  BOOL bFirstSample = TRUE;
  LONGLONG llBaseTime = 0;
  int sampleCount = 0;

  InitializeCriticalSection(critsec);

  printf("Recording...n");

  while (sampleCount < 100) {
    hr = pReader->ReadSample(
      MF_SOURCE_READER_ANY_STREAM, // Stream index.
      0, // Flags.
      streamIndex, // Receives the actual stream index.
      flags, // Receives status flags.
      llTimeStamp, // Receives the time stamp.
      pSample // Receives the sample or NULL.
    );

    wprintf(L"Stream %d (%I64d)n", streamIndex, llTimeStamp);

    if (pSample)
    {
      if (bFirstSample)
      {
        llBaseTime = llTimeSt
          bFirstSample = FALSE;
      }

      // rebase the time stamp
      llTimeStamp -= llBaseTime;

      hr = pSample->SetSampleTime(llTimeStamp);

      if (FAILED(hr))
      {
        printf("Set psample time failed.n");
      }

      hr = pWriter->WriteSample(0, pSample);

      if (FAILED(hr))
      {
        printf("Write sample failed.n");
      }
    }

    sampleCount++;
  }

  printf("Finalising the capture.");

  if (pWriter)
  {
    hr = pWriter->Finalize();
  }

  //WriteSampleToBitmap(pSample);

  // Shut down Media Foundation.
  MFShutdown();
}

HRESULT ConfigureEncoder(IMFMediaType *pType, IMFSinkWriter *pWriter)
{
  HRESULT hr = S_OK;

  IMFMediaType *pType2 = NULL;

  hr = MFCreateMediaType(pType2);

  if (SUCCEEDED(hr))
  {
    hr = pType2->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
  }

  if (SUCCEEDED(hr))
  {
    hr = pType2->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_H264);
  }

  if (SUCCEEDED(hr))
  {
    hr = pType2->SetUINT32(MF_MT_AVG_BITRATE, 240 * 1000);
  }

  if (SUCCEEDED(hr))
  {
    hr = CopyAttribute(pType, pType2, MF_MT_FRAME_SIZE);
  }

  if (SUCCEEDED(hr))
  {
    hr = CopyAttribute(pType, pType2, MF_MT_FRAME_RATE);
  }

  if (SUCCEEDED(hr))
  {
    hr = CopyAttribute(pType, pType2, MF_MT_PIXEL_ASPECT_RATIO);
  }

  if (SUCCEEDED(hr))
  {
    hr = CopyAttribute(pType, pType2, MF_MT_INTERLACE_MODE);
  }

  if (SUCCEEDED(hr))
  {
    DWORD pdwStreamIndex = 0;
    hr = pWriter->AddStream(pType2, pdwStreamIndex);
  }

  pType2->Release();

  return hr;
}

HRESULT CopyAttribute(IMFAttributes *pSrc, IMFAttributes *pDest, const GUID key)
{
  PROPVARIANT var;
  PropVariantInit(var);

  HRESULT hr = S_OK;

  hr = pSrc->GetItem(key, var);
  if (SUCCEEDED(hr))
  {
    hr = pDest->SetItem(key, var);
  }

  PropVariantClear(var);
  return hr;
}

One thing that’s missing is audio. I’ve got the video into the .mp4 file but I need an audio stream in there as well.

The next step is to get audio in and then try and check that the media file will be understood by a different video softphone, probably Counterpath’s Bria since I already have that installed.